From 617e577ecf76fa8273934ff047013ebae97a8cfc Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 28 Sep 2019 18:02:46 +0300 Subject: [PATCH 001/167] FIX(build): py2 needs pinning networkx-2.2 --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bd7883f4..d3dfec84 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,10 @@ author_email='huyng@yahoo-inc.com', url='http://github.com/yahoo/graphkit', packages=['graphkit'], - install_requires=['networkx'], + install_requires=[ + "networkx; python_version >= '3.5'", + "networkx == 2.2; python_version < '3.5'", + ], extras_require={ 'plot': ['pydot', 'matplotlib'] }, From f58d14865f45f07e5125aed9b0a3f151073fa122 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 00:36:57 +0300 Subject: [PATCH 002/167] FIX(#13): BUG in plot-diagram writtin from PY2 era, were writing in text-mode in PY3. and failing as encoding error. --- graphkit/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 0df3ddf8..24c3ac37 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -422,8 +422,8 @@ def get_node_name(a): # save plot if filename: - basename, ext = os.path.splitext(filename) - with open(filename, "w") as fh: + _basename, ext = os.path.splitext(filename) + with open(filename, "wb") as fh: if ext.lower() == ".png": fh.write(g.create_png()) elif ext.lower() == ".dot": From c75a2c0cf571430161ae1bf28f84f1c982c7d760 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 00:20:40 +0300 Subject: [PATCH 003/167] doc(#13): sample code to plot workflow diagram in intro --- README.md | 13 +++++++++++++ docs/source/index.rst | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/README.md b/README.md index 0e1e95a4..af414020 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,19 @@ print(out) As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! +For debugging, you may plot the workflow with one of these methods: + +```python + graph.net.plot(show=True) # open a matplotlib window + graph.net.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg +``` + +> **NOTE**: For plots, `graphviz` must be in your PATH, and `pydot` & `matplotlib` python packages installed. +> You may install both when installing *graphkit* with its `plot` extras: +> ```python +> pip install graphkit[plot] +> ``` + # License Code licensed under the Apache License, Version 2.0 license. See LICENSE file for terms. diff --git a/docs/source/index.rst b/docs/source/index.rst index 5c5e505c..6b5cb690 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -69,6 +69,18 @@ Here's a Python script with an example GraphKit computation graph that produces As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! +For debugging, you may plot the workflow with one of these methods:: + + graph.net.plot(show=True) # open a matplotlib window + graph.net.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg + +.. NOTE:: + For plots, ``graphviz`` must be in your PATH, and ``pydot` & ``matplotlib`` python packages installed. + You may install both when installing *graphkit* with its `plot` extras:: + + pip install graphkit[plot] + + License ------- From a005bd6b2c209c91871ea9ea266dba8e2ec7ab86 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 01:06:15 +0300 Subject: [PATCH 004/167] enh(plot): provide help msg on supported file-exts --- graphkit/network.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/graphkit/network.py b/graphkit/network.py index 24c3ac37..33b8363e 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -435,7 +435,10 @@ def get_node_name(a): elif ext.lower() == ".svg": fh.write(g.create_svg()) else: - raise Exception("Unknown file format for saving graph: %s" % ext) + raise Exception( + "Unknown file format for saving graph: %s" + " File extensions must be one of: .png .dot .jpg .jpeg .pdf .svg" + % ext) # display graph via matplotlib if show: From 506be806998b0f3d88b1d7c63ee598a978e8f4b9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 28 Sep 2019 12:53:56 +0300 Subject: [PATCH 005/167] FEAT: +TOKEN modifier for side-effect functions Tokens work as usual while solving the DAG but they are never assigned any values to/from the operation's functions. + TC included. + Docs updated.\+ Added `modifiers` superclass to facilitate identification code. + refactored FunctionalOperation._compute(). --- docs/source/operations.rst | 1 + graphkit/functional.py | 18 ++++++++++---- graphkit/modifiers.py | 51 ++++++++++++++++++++++++++++++++++++-- test/test_graphkit.py | 15 +++++++++++ 4 files changed, 78 insertions(+), 7 deletions(-) diff --git a/docs/source/operations.rst b/docs/source/operations.rst index b7b4dbad..6a5c3124 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -134,3 +134,4 @@ Modifiers on ``operation`` inputs and outputs Certain modifiers are available to apply to input or output values in ``needs`` and ``provides``, for example to designate an optional input. These modifiers are available in the ``graphkit.modifiers`` module: .. autoclass:: graphkit.modifiers.optional +.. autoclass:: graphkit.modifiers.token diff --git a/graphkit/functional.py b/graphkit/functional.py index 65388973..38e1b103 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -5,7 +5,7 @@ from .base import Operation, NetworkOperation from .network import Network -from .modifiers import optional +from .modifiers import modifier, optional, token class FunctionalOperation(Operation): @@ -14,7 +14,7 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - inputs = [named_inputs[d] for d in self.needs if not isinstance(d, optional)] + inputs = [named_inputs[d] for d in self.needs if not isinstance(d, modifier)] # Find any optional inputs in named_inputs. Get only the ones that # are present there, no extra `None`s. @@ -22,13 +22,21 @@ def _compute(self, named_inputs, outputs=None): # Combine params and optionals into one big glob of keyword arguments. kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} + result = self.fn(*inputs, **kwargs) - if len(self.provides) == 1: + + # Don't expect token outputs. + provides = [n for n in self.provides if not isinstance(n, token)] + if not provides: + # All outputs were tokens. + return {} + + if len(provides) == 1: result = [result] - result = zip(self.provides, result) + result = zip(provides, result) if outputs: - outputs = set(outputs) + outputs = set(n for n in outputs if not isinstance(n, token)) result = filter(lambda x: x[0] in outputs, result) return dict(result) diff --git a/graphkit/modifiers.py b/graphkit/modifiers.py index 652a7969..2eddcbea 100644 --- a/graphkit/modifiers.py +++ b/graphkit/modifiers.py @@ -8,7 +8,12 @@ file associated with the project for terms. """ -class optional(str): + +class modifier(str): + pass + + +class optional(modifier): """ Input values in ``needs`` may be designated as optional using this modifier. If this modifier is applied to an input value, that value will be input to @@ -28,7 +33,7 @@ def myadd(a, b, c=0): # Designate c as an optional argument. graph = compose('mygraph')( - operator(name='myadd', needs=['a', 'b', optional('c')], provides='sum')(myadd) + operation(name='myadd', needs=['a', 'b', optional('c')], provides='sum')(myadd) ) # The graph works with and without 'c' provided as input. @@ -36,4 +41,46 @@ def myadd(a, b, c=0): assert graph({'a': 5, 'b': 2})['sum'] == 7 """ + + pass + + +class token(modifier): + """ + Inputs & outputs in ``needs`` & ``provides`` may be designated as *tokens* + using this modifier. *Tokens* work as usual while solving the DAG but + they are never assigned any values to/from the ``operation`` functions. + Specifically: + + - input tokens are NOT fed into the function; + - output tokens are NOT expected from the function. + + Their purpose is to describe functions that have side-effects. + Note that an ``operation`` with just a single *token* output return + no value at all (it is called only for its side-effects). + + A typical use case is to signify columns required to produce new ones in + pandas dataframes:: + + from graphkit import operation, compose + from graphkit.modifiers import token + + # Function appending a new dataframe column from two pre-existing ones. + def addcolumns(df): + df['sum'] = df['a'] + df['b'] + + # Designate `a`, `b` & `sum` column names as an token arguments. + graph = compose('mygraph')( + operation( + name='addcolumns', + needs=['df', token('a'), token('b')], + provides=[token('sum')])(addcolumns) + ) + + # The graph works with and without 'c' provided as input. + df = pd.DataFrame({'a': [5], 'b': [2]}) + assert graph({'df': df})['sum'] == 11 + + """ + pass diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..234c8f1c 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -207,6 +207,21 @@ def addplusplus(a, b, c=0): assert 'sum' in results assert results['sum'] == sum(named_inputs.values()) +def test_tokens(): + # Function without return value. + def sideeffect(n): + n[0] += 2 + + # Designate `a`, `b` as token inp/out arguments. + graph = compose('mygraph')( + operation( + name='sideeffect', + needs=['n', modifiers.token('a')], + provides=[modifiers.token('b')])(sideeffect) + ) + + assert graph({'n': [0]})['n'] == [2] + def test_deleted_optional(): # Test that DeleteInstructions included for optionals do not raise From 4e6d543b49bb8200345145e8e367f7648d0f7e58 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 28 Sep 2019 12:54:38 +0300 Subject: [PATCH 006/167] fix(func): 1-item outs broke with token modifiers... + _norm_kwargs: + use isinstance() instead of type-equality checks, to support new modier classes; + avoid excessive dict searches with local vars. --- graphkit/functional.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index 38e1b103..16f07750 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -84,18 +84,20 @@ def __init__(self, fn=None, **kwargs): def _normalize_kwargs(self, kwargs): # Allow single value for needs parameter - if 'needs' in kwargs and type(kwargs['needs']) == str: - assert kwargs['needs'], "empty string provided for `needs` parameters" - kwargs['needs'] = [kwargs['needs']] + needs = kwargs['needs'] + if isinstance(needs, str) and not isinstance(needs, optional): + assert needs, "empty string provided for `needs` parameters" + kwargs['needs'] = [needs] # Allow single value for provides parameter - if 'provides' in kwargs and type(kwargs['provides']) == str: - assert kwargs['provides'], "empty string provided for `needs` parameters" - kwargs['provides'] = [kwargs['provides']] + provides = kwargs.get('provides') + if isinstance(provides, str): + assert provides, "empty string provided for `needs` parameters" + kwargs['provides'] = [provides] assert kwargs['name'], "operation needs a name" - assert type(kwargs['needs']) == list, "no `needs` parameter provided" - assert type(kwargs['provides']) == list, "no `provides` parameter provided" + assert isinstance(kwargs['needs'], list), "no `needs` parameter provided" + assert isinstance(kwargs['provides'], list), "no `provides` parameter provided" assert hasattr(kwargs['fn'], '__call__'), "operation was not provided with a callable" if type(kwargs['params']) is not dict: From 62cf0a0e29bda4f4f65427c0e72c86e9f0d34db2 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 28 Sep 2019 18:27:09 +0300 Subject: [PATCH 007/167] refact(token): undo adding of new `modifier` class... not really needed, better be explicit which modifier is searched. --- graphkit/functional.py | 6 ++++-- graphkit/modifiers.py | 8 ++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index 16f07750..98a9fbbb 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -5,7 +5,7 @@ from .base import Operation, NetworkOperation from .network import Network -from .modifiers import modifier, optional, token +from .modifiers import optional, token class FunctionalOperation(Operation): @@ -14,7 +14,9 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - inputs = [named_inputs[d] for d in self.needs if not isinstance(d, modifier)] + inputs = [named_inputs[d] + for d in self.needs + if not isinstance(d, optional) and not isinstance(d, token)] # Find any optional inputs in named_inputs. Get only the ones that # are present there, no extra `None`s. diff --git a/graphkit/modifiers.py b/graphkit/modifiers.py index 2eddcbea..e6696c7c 100644 --- a/graphkit/modifiers.py +++ b/graphkit/modifiers.py @@ -9,11 +9,7 @@ """ -class modifier(str): - pass - - -class optional(modifier): +class optional(str): """ Input values in ``needs`` may be designated as optional using this modifier. If this modifier is applied to an input value, that value will be input to @@ -45,7 +41,7 @@ def myadd(a, b, c=0): pass -class token(modifier): +class token(str): """ Inputs & outputs in ``needs`` & ``provides`` may be designated as *tokens* using this modifier. *Tokens* work as usual while solving the DAG but From 47c6c1d90462eea88e2b25bd5041130786bc6c80 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 28 Sep 2019 18:27:50 +0300 Subject: [PATCH 008/167] enh(token.TC): test 2 reversed token workflows --- test/test_graphkit.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 234c8f1c..bd9b9990 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -209,18 +209,40 @@ def addplusplus(a, b, c=0): def test_tokens(): # Function without return value. - def sideeffect(n): - n[0] += 2 + def extend(box): + box.extend([1, 2]) + + def increment(box): + for i in range(len(box)): + box[i] += 1 # Designate `a`, `b` as token inp/out arguments. graph = compose('mygraph')( operation( - name='sideeffect', - needs=['n', modifiers.token('a')], - provides=[modifiers.token('b')])(sideeffect) + name='extend', + needs=['box', modifiers.token('a')], + provides=[modifiers.token('b')])(extend), + operation( + name='increment', + needs=['box', modifiers.token('b')], + provides=modifiers.token('c'))(increment), + ) + + assert graph({'box': [0]})['box'] == [1, 2, 3] + + # Reverse order of functions. + graph = compose('mygraph')( + operation( + name='increment', + needs=['box', modifiers.token('a')], + provides=modifiers.token('b'))(increment), + operation( + name='extend', + needs=['box', modifiers.token('b')], + provides=[modifiers.token('c')])(extend), ) - assert graph({'n': [0]})['n'] == [2] + assert graph({'box': [0]})['box'] == [1, 1, 2] def test_deleted_optional(): From 94b7291f3a72e7968703f9150440f555fe9c6f47 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 14:13:16 +0300 Subject: [PATCH 009/167] ENH(OPTIONAL): optionals per OPERATION, not to NEEDs... o + Closes #17 --- graphkit/base.py | 4 ++++ graphkit/functional.py | 15 +++++++++++++-- graphkit/network.py | 12 ++++++++++-- test/test_graphkit.py | 4 +++- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 1c04e8d5..d5d27353 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -26,6 +26,10 @@ class Operation(object): specific application. """ + #: Owning :class:`~.network.Network`, set when added in a network. + #: Needed by `_compute()` to detect *optional needs* from edge-attributes. + net = None + def __init__(self, **kwargs): """ Create a new layer instance. diff --git a/graphkit/functional.py b/graphkit/functional.py index 65388973..baa79474 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -14,11 +14,22 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - inputs = [named_inputs[d] for d in self.needs if not isinstance(d, optional)] + assert self.net + + inputs = [ + named_inputs[n] + for n in self.needs + if 'optional' not in self.net.graph.get_edge_data(n, self) + ] # Find any optional inputs in named_inputs. Get only the ones that # are present there, no extra `None`s. - optionals = {n: named_inputs[n] for n in self.needs if isinstance(n, optional) and n in named_inputs} + optionals = { + n: named_inputs[n] + for n in self.needs + if 'optional' in self.net.graph.get_edge_data(n, self) + and n in named_inputs + } # Combine params and optionals into one big glob of keyword arguments. kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} diff --git a/graphkit/network.py b/graphkit/network.py index 24c3ac37..ef6268c9 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -8,6 +8,7 @@ from io import StringIO from .base import Operation +from . import modifiers class DataPlaceholderNode(str): @@ -73,9 +74,16 @@ def add_op(self, operation): # assert layer is only added once to graph assert operation not in self.graph.nodes(), "Operation may only be added once" + # functionalOperations don't have that set. + if not operation.net: + operation.net = self + # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: - self.graph.add_edge(DataPlaceholderNode(n), operation) + if isinstance(n, modifiers.optional): + self.graph.add_edge(DataPlaceholderNode(n), operation, optional=True) + else: + self.graph.add_edge(DataPlaceholderNode(n), operation) # add nodes and edges to graph describing what this layer provides for p in operation.provides: @@ -107,7 +115,7 @@ def compile(self): self.steps = [] # create an execution order such that each layer's needs are provided. - ordered_nodes = list(nx.dag.topological_sort(self.graph)) + ordered_nodes = list(nx.topological_sort(self.graph)) # add Operations evaluation steps, and instructions to free data. for i, node in enumerate(ordered_nodes): diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..d2bef2ed 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -32,7 +32,9 @@ def mul_op1(a, b): @operation(name='pow_op1', needs='sum_ab', provides=['sum_ab_p1', 'sum_ab_p2', 'sum_ab_p3'], params={'exponent': 3}) def pow_op1(a, exponent=2): return [math.pow(a, y) for y in range(1, exponent+1)] - + + # `_compute()` needs a` nx-DiGraph in op's `net` attribute. + compose("mock graph")(pow_op1) print(pow_op1._compute({'sum_ab':2}, ['sum_ab_p2'])) # Partial operation that is bound at a later time From 52c0d7797489866c0a2b745b44014a3bf6626286 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 01:00:48 +0300 Subject: [PATCH 010/167] enh(test): + x2 TC breaking UNSATISFIED operations... receiving partial inputs, needed for other operations. --- test/test_graphkit.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..a6d4dcb3 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -5,7 +5,7 @@ import pickle from pprint import pprint -from operator import add +from operator import add, sub, floordiv, mul from numpy.testing import assert_raises import graphkit.network as network @@ -184,6 +184,38 @@ def test_pruning_raises_for_bad_output(): outputs=['sum1', 'sum3', 'sum4']) +def test_unsatisfied_operations(): + # Test that operations with partial inputs are culled and not failing. + graph = compose(name="graph")( + operation(name="add", needs=["a", "b1"], provides=["a+b1"])(add), + operation(name="sub", needs=["a", "b2"], provides=["a-b2"])(sub), + ) + + exp = {"a": 10, "b1": 2, "a+b1": 12} + assert graph({"a": 10, "b1": 2}) == exp + assert graph({"a": 10, "b1": 2}, outputs=["a+b1"]) == {"a+b1": 12} + + exp = {"a": 10, "b2": 2, "a-b2": 8} + assert graph({"a": 10, "b2": 2}) == exp + assert graph({"a": 10, "b2": 2}, outputs=["a-b2"]) == {"a-b2": 8} + +def test_unsatisfied_operations_same_out(): + # Test unsatisfied pairs of operations providing the same output. + graph = compose(name="graph")( + operation(name="mul", needs=["a", "b1"], provides=["ab"])(mul), + operation(name="div", needs=["a", "b2"], provides=["ab"])(floordiv), + operation(name="add", needs=["ab", "c"], provides=["ab_plus_c"])(add), + ) + + exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} + assert graph({"a": 10, "b1": 2, "c": 1}) == exp + assert graph({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == {"ab_plus_c": 21} + + exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} + assert graph({"a": 10, "b2": 2, "c": 1}) == exp + assert graph({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == {"ab_plus_c": 6} + + def test_optional(): # Test that optional() needs work as expected. From bc4c2211d25466896cb5738c4fd9d00c04633e6e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 29 Sep 2019 19:51:57 +0300 Subject: [PATCH 011/167] ENH(net,#18): ignore UN-SATISFIABLE operations with partial inputs + The x2 TCs added just before are now passing. --- graphkit/network.py | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/graphkit/network.py b/graphkit/network.py index 24c3ac37..04f0dca1 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -8,6 +8,7 @@ from io import StringIO from .base import Operation +from .modifiers import optional class DataPlaceholderNode(str): @@ -141,6 +142,65 @@ def compile(self): raise TypeError("Unrecognized network graph node") + def _collect_satisfiable_needs(self, operation, inputs, satisfiables, visited): + """ + Recusrively check if operation inputs are given/calculated (satisfied), or not. + + :param satisfiables: + the set to populate with satisfiable operations + + :param visited: + a cache of operations & needs, not to visit them again + :return: + true if opearation is satisfiable + """ + assert isinstance(operation, Operation), ( + "Expected Operation, got:", + type(operation), + ) + + if operation in visited: + return visited[operation] + + + def is_need_satisfiable(need): + if need in visited: + return visited[need] + + if need in inputs: + satisfied = True + else: + need_providers = list(self.graph.predecessors(need)) + satisfied = bool(need_providers) and any( + self._collect_satisfiable_needs(op, inputs, satisfiables, visited) + for op in need_providers + ) + visited[need] = satisfied + + return satisfied + + satisfied = all( + is_need_satisfiable(need) + for need in operation.needs + if not isinstance(need, optional) + ) + if satisfied: + satisfiables.add(operation) + visited[operation] = satisfied + + return satisfied + + + def _collect_satisfiable_operations(self, nodes, inputs): + satisfiables = set() + visited = {} + for node in nodes: + if node not in visited and isinstance(node, Operation): + self._collect_satisfiable_needs(node, inputs, satisfiables, visited) + + return satisfiables + + def _find_necessary_steps(self, outputs, inputs): """ Determines what graph steps need to pe run to get to the requested @@ -204,6 +264,13 @@ def _find_necessary_steps(self, outputs, inputs): # Get rid of the unnecessary nodes from the set of necessary ones. necessary_nodes -= unnecessary_nodes + # Drop (un-satifiable) operations with partial inputs. + # See https://github.com/yahoo/graphkit/pull/18 + # + satisfiables = self._collect_satisfiable_operations(necessary_nodes, inputs) + for node in list(necessary_nodes): + if isinstance(node, Operation) and node not in satisfiables: + necessary_nodes.remove(node) necessary_steps = [step for step in self.steps if step in necessary_nodes] From b8daa07bc83b249276107eedf64f9a1d12f584a6 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 12:27:17 +0300 Subject: [PATCH 012/167] refact(net): drop old `dag` nx-package --- graphkit/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/network.py b/graphkit/network.py index 0df3ddf8..bb5a198c 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -107,7 +107,7 @@ def compile(self): self.steps = [] # create an execution order such that each layer's needs are provided. - ordered_nodes = list(nx.dag.topological_sort(self.graph)) + ordered_nodes = list(nx.topological_sort(self.graph)) # add Operations evaluation steps, and instructions to free data. for i, node in enumerate(ordered_nodes): From 12bdfe4965ab0c08606bf0551799f42057eb7776 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 15:00:21 +0300 Subject: [PATCH 013/167] ENH(core): ORDERED SETs for DETERMINISTIC results NOTE dict are not deterministic in = '3.5'", "networkx == 2.2; python_version < '3.5'", + "boltons" # for IndexSet ], extras_require={ 'plot': ['pydot', 'matplotlib'] From 489b32c0ed5cae94baa58d4a588d638959fc4e3e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 17:26:29 +0300 Subject: [PATCH 014/167] refact(net): simpilify del-instruction loop --- graphkit/network.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index db9b91de..9fbfd1e9 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -126,19 +126,16 @@ def compile(self): # Add instructions to delete predecessors as possible. A # predecessor may be deleted if it is a data placeholder that # is no longer needed by future Operations. - for predecessor in self.graph.predecessors(node): + for need in self.graph.pred[node]: if self._debug: - print("checking if node %s can be deleted" % predecessor) - predecessor_still_needed = False + print("checking if node %s can be deleted" % need) for future_node in ordered_nodes[i+1:]: - if isinstance(future_node, Operation): - if predecessor in future_node.needs: - predecessor_still_needed = True - break - if not predecessor_still_needed: + if isinstance(future_node, Operation) and need in future_node.needs: + break + else: if self._debug: - print(" adding delete instruction for %s" % predecessor) - self.steps.append(DeleteInstruction(predecessor)) + print(" adding delete instruction for %s" % need) + self.steps.append(DeleteInstruction(need)) else: raise TypeError("Unrecognized network graph node") From b102d44358ef1e60e6d6cffea516bcf11ffe864d Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 1 Oct 2019 19:01:13 +0300 Subject: [PATCH 015/167] REFACT(unsatisfied): doubly-recursive func --> loop on topo-sorted --- graphkit/network.py | 100 ++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 60 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 9fbfd1e9..cad561ee 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -5,7 +5,10 @@ import os import networkx as nx +from collections import defaultdict from io import StringIO +from itertools import chain + from boltons.setutils import IndexedSet as iset @@ -138,66 +141,45 @@ def compile(self): self.steps.append(DeleteInstruction(need)) else: - raise TypeError("Unrecognized network graph node") + raise TypeError("Unrecognized network graph node %s" % type(node)) - def _collect_satisfiable_needs(self, operation, inputs, satisfiables, visited): + def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): """ - Recusrively check if operation inputs are given/calculated (satisfied), or not. - - :param satisfiables: - the set to populate with satisfiable operations - - :param visited: - a cache of operations & needs, not to visit them again - :return: - true if opearation is satisfiable + Traverse ordered graph and mark satisfied needs on each operation, + + collecting those missing at least one. + Since the graph is ordered, as soon as we're on an operation, + all its needs have been accounted, so we can get its satisfaction. + + :param necessary_nodes: + the subset of the graph to consider but WITHOUT the initial data + (because that is what :meth:`_find_necessary_steps()` can gives us...) + :param inputs: + an iterable of the names of the input values + return: + a list of unsatisfiable operations """ - assert isinstance(operation, Operation), ( - "Expected Operation, got:", - type(operation), - ) - - if operation in visited: - return visited[operation] - - - def is_need_satisfiable(need): - if need in visited: - return visited[need] - - if need in inputs: - satisfied = True - else: - need_providers = list(self.graph.predecessors(need)) - satisfied = bool(need_providers) and any( - self._collect_satisfiable_needs(op, inputs, satisfiables, visited) - for op in need_providers - ) - visited[need] = satisfied - - return satisfied - - satisfied = all( - is_need_satisfiable(need) - for need in operation.needs - if not isinstance(need, optional) - ) - if satisfied: - satisfiables.add(operation) - visited[operation] = satisfied - - return satisfied - - - def _collect_satisfiable_operations(self, nodes, inputs): - satisfiables = set() # unordered, not iterated - visited = {} - for node in nodes: - if node not in visited and isinstance(node, Operation): - self._collect_satisfiable_needs(node, inputs, satisfiables, visited) + G = self.graph # shortcut + ok_data = set(inputs) # to collect producible data + op_satisfaction = defaultdict(set) # to collect operation satisfiable needs + unsatisfiables = [] # to collect operations with partial needs + # We also need inputs to mark op_satisfaction. + nodes = chain(necessary_nodes, inputs) # note that `inputs` are plain strings + for node in nx.topological_sort(G.subgraph(nodes)): + if isinstance(node, Operation): + real_needs = set(n for n in node.needs if not isinstance(n, optional)) + if real_needs.issubset(op_satisfaction[node]): + # mark all future data-provides as ok + ok_data.update(G.adj[node]) + else: + unsatisfiables.append(node) + elif isinstance(node, (DataPlaceholderNode, str)) and node in ok_data: + # mark satisfied-needs on all future operations + for future_op in G.adj[node]: + op_satisfaction[future_op].add(node) - return satisfiables + return unsatisfiables def _find_necessary_steps(self, outputs, inputs): @@ -264,12 +246,10 @@ def _find_necessary_steps(self, outputs, inputs): necessary_nodes -= unnecessary_nodes # Drop (un-satifiable) operations with partial inputs. - # See https://github.com/yahoo/graphkit/pull/18 + # See yahoo/graphkit#18 # - satisfiables = self._collect_satisfiable_operations(necessary_nodes, inputs) - for node in list(necessary_nodes): - if isinstance(node, Operation) and node not in satisfiables: - necessary_nodes.remove(node) + unsatisfiables = self._collect_unsatisfiable_operations(necessary_nodes, inputs) + necessary_nodes -= set(unsatisfiables) necessary_steps = [step for step in self.steps if step in necessary_nodes] From de0288524014c21a241d207a558e7da695265f5f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 04:09:21 +0300 Subject: [PATCH 016/167] test(dag,#25): FAILing TC for overriding intermediate data --- test/test_graphkit.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..9a6473cf 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -184,6 +184,17 @@ def test_pruning_raises_for_bad_output(): outputs=['sum1', 'sum3', 'sum4']) +def test_pruning_not_overrides_given_intermediate(): + # Test #25: not overriding intermediate data when an output is not asked + graph = compose(name="graph")( + operation(name="unjustly run", needs=["a"], provides=["overriden"])(lambda a: a), + operation(name="op", needs=["overriden", "c"], provides=["asked"])(add), + ) + + assert graph({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == {"asked": 3} # that"s ok + assert graph({"a": 5, "overriden": 1, "c": 2}) == {"a": 5, "overriden": 1, "c": 2, "asked": 3} # FAILs + + def test_optional(): # Test that optional() needs work as expected. From e1454fdbdeac0a800f41fc32397a3b189ff8e7c4 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 06:23:20 +0300 Subject: [PATCH 017/167] test(dag,#24): FAILing TC for over-pruning inetermediates when outs asked --- test/test_graphkit.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 9a6473cf..a34ab7e4 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -195,6 +195,18 @@ def test_pruning_not_overrides_given_intermediate(): assert graph({"a": 5, "overriden": 1, "c": 2}) == {"a": 5, "overriden": 1, "c": 2, "asked": 3} # FAILs +def test_pruning_with_given_intermediate_and_asked_out(): + # Test pruning intermidate data is the same when outputs are (not) asked . + graph = compose(name="graph")( + operation(name="unjustly pruned", needs=["given-1"], provides=["a"])(lambda a: a), + operation(name="shortcuted", needs=["a", "b"], provides=["given-2"])(add), + operation(name="good_op", needs=["a", "given-2"], provides=["asked"])(add), + ) + + assert graph({"given-1": 5, "b": 2, "given-2": 2}) == {"given-1": 5, "b": 2, "given-2": 7, "a": 5, "b": 2, "asked": 12} # that ok # FAILS! + assert graph({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == {"asked": 12} # FAILS! + + def test_optional(): # Test that optional() needs work as expected. From c2730685c880eebfbb4ad66256606bd7000564d0 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 14:11:59 +0300 Subject: [PATCH 018/167] DOC(net): explain DAG solution & compilation... the later described in #21. --- graphkit/network.py | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index cad561ee..30ea7189 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -27,7 +27,7 @@ def __repr__(self): class DeleteInstruction(str): """ - An instruction for the compiled list of evaluation steps to free or delete + An instruction in the compiled list of operation steps to free or delete a Data instance from the Network's cache after it is no longer needed. """ def __repr__(self): @@ -39,6 +39,26 @@ class Network(object): This is the main network implementation. The class contains all of the code necessary to weave together operations into a directed-acyclic-graph (DAG) and pass data through. + + The computation, ie the execution of the *operations* for given *inputs* + and asked *outputs* is based on 4 data-structures: + + - The `networkx` :attr:`graph` DAG, containing interchanging layers of + :class:`Operation` and :class:`DataPlaceholderNode` nodes. + They are layed out and connected by :meth:`add_OP`. + + - the :attr:`steps` list holding all operation nodes in *execution order*. + It is constructed in :meth:`compile()` after all nodes have been added + into the `graph`. + + - The ``necessary_steps`` list which is the *DAG solution* of each run, and + is always a subset of :attr:`steps`. + It is computed by :meth:`_find_necessary_steps()` and cached in + :attr:`_necessary_steps_cache` across runs with the same inputs/outputs. + + - the :var:`cache` local-var, initialized on each run of both + ``_compute_xxx`` methods (for parallel or sequential executions), to + holding all given input & generated (aka intermediate) data values. """ def __init__(self, **kwargs): @@ -106,8 +126,17 @@ def show_layers(self): def compile(self): - """Create a set of steps for evaluating layers - and freeing memory as necessary""" + """ + Create a list of operations to evaluate layers and free memory asap + + + In the list :class:`DeleteInstructions` steps (DA) are inserted between + operation nodes to reduce the memory footprint of cached results. + A DA is inserted whenever a *need* is not used by any other *operation* + further down the DAG. + Note that since the *cache* is not reused across `compute()` invocations, + any memory-reductions are for as long as a single computation runs. + """ # clear compiled steps self.steps = [] @@ -150,7 +179,7 @@ def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): collecting those missing at least one. Since the graph is ordered, as soon as we're on an operation, - all its needs have been accounted, so we can get its satisfaction. + all its needs have been accounted, so we can get its satisfaction. :param necessary_nodes: the subset of the graph to consider but WITHOUT the initial data From 16d42f1bd3523d712fa3f75b28ae06100288bc61 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 15:00:05 +0300 Subject: [PATCH 019/167] TEST(prune): +Failing x2 TCs multi-out must run but not... override intermediate data. More changes only for newer pruning TCs: + refact(test): rename graph-->netop vars for results of compose(), to avoid of `graph.net.graph`. + Explain failure modes in v1.2.4 & this merged branch (#19 + #23). --- test/test_graphkit.py | 111 +++++++++++++++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 22 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 5c731a29..9ee74b20 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -12,6 +12,21 @@ import graphkit.modifiers as modifiers from graphkit import operation, compose, Operation + +def identity(x): + return x + + +def filtdict(d, *keys): + """ + Keep dict items with the given keys + + >>> filtdict({"a": 1, "b": 2}, "b") + {"b": 2} + """ + return type(d)(i for i in d.items() if i[0] in keys) + + def test_network(): # Sum operation, late-bind compute function @@ -185,58 +200,110 @@ def test_pruning_raises_for_bad_output(): def test_pruning_not_overrides_given_intermediate(): - # Test #25: not overriding intermediate data when an output is not asked - graph = compose(name="graph")( - operation(name="unjustly run", needs=["a"], provides=["overriden"])(lambda a: a), + # Test #25: v1.2.4 overrides intermediate data when no output asked + netop = compose(name="netop")( + operation(name="unjustly run", needs=["a"], provides=["overriden"])(identity), operation(name="op", needs=["overriden", "c"], provides=["asked"])(add), ) - assert graph({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == {"asked": 3} # that"s ok - assert graph({"a": 5, "overriden": 1, "c": 2}) == {"a": 5, "overriden": 1, "c": 2, "asked": 3} # FAILs + exp = {"a": 5, "overriden": 1, "c": 2, "asked": 3} + # v1.2.4.ok + assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + # FAILs + # - on v1.2.4 with (overriden, asked): = (5, 7) instead of (1, 3) + # - on #18(unsatisfied) + #23(ordered-sets) with (overriden, asked) = (5, 7) instead of (1, 3) + assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + + +def test_pruning_multiouts_not_override_intermediates1(): + # Test #25: v.1.2.4 overrides intermediate data when a previous operation + # must run for its other outputs (outputs asked or not) + netop = compose(name="netop")( + operation(name="must run", needs=["a"], provides=["overriden", "calced"]) + (lambda x: (x, 2 * x)), + operation(name="add", needs=["overriden", "calced"], provides=["asked"])(add), + ) + + exp = {"a": 5, "overriden": 1, "calced": 10, "asked": 11} + # FAILs + # - on v1.2.4 with (overriden, asked) = (5, 15) instead of (1, 11) + # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. + assert netop({"a": 5, "overriden": 1}) == exp + # FAILs + # - on v1.2.4 with KeyError: 'e', + # - on #18(unsatisfied) + #23(ordered-sets) with empty result. + assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + + +def test_pruning_multiouts_not_override_intermediates2(): + # Test #25: v.1.2.4 overrides intermediate data when a previous operation + # must run for its other outputs (outputs asked or not) + netop = compose(name="netop")( + operation(name="must run", needs=["a"], provides=["overriden", "e"]) + (lambda x: (x, 2 * x)), + operation(name="op1", needs=["overriden", "c"], provides=["d"])(add), + operation(name="op2", needs=["d", "e"], provides=["asked"])(mul), + ) + + exp = {"a": 5, "overriden": 1, "c": 2, "asked": 3} + # FAILs + # - on v1.2.4 with (overriden, asked) = (5, 70) instead of (1, 13) + # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. + assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + # FAILs + # - on v1.2.4 with KeyError: 'e', + # - on #18(unsatisfied) + #23(ordered-sets) with empty result. + assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") def test_pruning_with_given_intermediate_and_asked_out(): - # Test pruning intermidate data is the same when outputs are (not) asked . - graph = compose(name="graph")( - operation(name="unjustly pruned", needs=["given-1"], provides=["a"])(lambda a: a), + # Test #24: v1.2.4 does not prune before given intermediate data when + # outputs not asked, but does so when output asked. + netop = compose(name="netop")( + operation(name="unjustly pruned", needs=["given-1"], provides=["a"])(identity), operation(name="shortcuted", needs=["a", "b"], provides=["given-2"])(add), operation(name="good_op", needs=["a", "given-2"], provides=["asked"])(add), ) - assert graph({"given-1": 5, "b": 2, "given-2": 2}) == {"given-1": 5, "b": 2, "given-2": 7, "a": 5, "b": 2, "asked": 12} # that ok # FAILS! - assert graph({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == {"asked": 12} # FAILS! + exp = {"given-1": 5, "b": 2, "given-2": 7, "a": 5, "asked": 12} + # v1.2.4 is ok + assert netop({"given-1": 5, "b": 2, "given-2": 2}) == exp + # FAILS + # - on v1.2.4 with KeyError: 'a', + # - on #19 (unsatisfied) with no result. + assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") def test_unsatisfied_operations(): # Test that operations with partial inputs are culled and not failing. - graph = compose(name="graph")( + netop = compose(name="netop")( operation(name="add", needs=["a", "b1"], provides=["a+b1"])(add), operation(name="sub", needs=["a", "b2"], provides=["a-b2"])(sub), ) - + exp = {"a": 10, "b1": 2, "a+b1": 12} - assert graph({"a": 10, "b1": 2}) == exp - assert graph({"a": 10, "b1": 2}, outputs=["a+b1"]) == {"a+b1": 12} + assert netop({"a": 10, "b1": 2}) == exp + assert netop({"a": 10, "b1": 2}, outputs=["a+b1"]) == filtdict(exp, "a+b1") exp = {"a": 10, "b2": 2, "a-b2": 8} - assert graph({"a": 10, "b2": 2}) == exp - assert graph({"a": 10, "b2": 2}, outputs=["a-b2"]) == {"a-b2": 8} + assert netop({"a": 10, "b2": 2}) == exp + assert netop({"a": 10, "b2": 2}, outputs=["a-b2"]) == filtdict(exp, "a-b2") def test_unsatisfied_operations_same_out(): # Test unsatisfied pairs of operations providing the same output. - graph = compose(name="graph")( + netop = compose(name="netop")( operation(name="mul", needs=["a", "b1"], provides=["ab"])(mul), operation(name="div", needs=["a", "b2"], provides=["ab"])(floordiv), operation(name="add", needs=["ab", "c"], provides=["ab_plus_c"])(add), ) - + exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} - assert graph({"a": 10, "b1": 2, "c": 1}) == exp - assert graph({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == {"ab_plus_c": 21} + assert netop({"a": 10, "b1": 2, "c": 1}) == exp + assert netop({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} - assert graph({"a": 10, "b2": 2, "c": 1}) == exp - assert graph({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == {"ab_plus_c": 6} + assert netop({"a": 10, "b2": 2, "c": 1}) == exp + assert netop({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") def test_optional(): From b92f103ee035b5e4fc8a8a49b90df96ae547e66a Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 16:11:58 +0300 Subject: [PATCH 020/167] refact(dag): call compile() before compute.compute... not after compose(). + All TCs pass ok. + NOTE this is not yet what is described in #21. --- graphkit/functional.py | 1 - graphkit/network.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index 9de470a5..dcbe2042 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -209,6 +209,5 @@ def order_preserving_uniquifier(seq, seen=None): net = Network() for op in operations: net.add_op(op) - net.compile() return NetworkOperation(name=self.name, needs=needs, provides=provides, params={}, net=net) diff --git a/graphkit/network.py b/graphkit/network.py index 30ea7189..9ebdfd27 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -307,11 +307,12 @@ def compute(self, outputs, named_inputs, method=None): :returns: a dictionary of output data objects, keyed by name. """ - # assert that network has been compiled - assert self.steps, "network must be compiled before calling compute." assert isinstance(outputs, (list, tuple)) or outputs is None,\ "The outputs argument must be a list" + # Compile lazily here. + if not self.steps: + self.compile() # choose a method of execution if method == "parallel": From 6d1884e9de6e3be2328aca7ce5502a028cf5caf4 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 17:07:42 +0300 Subject: [PATCH 021/167] test(dag): +TC checking DeleteInst vary when inputs change --- test/test_graphkit.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 9ee74b20..dd0ef5cf 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -270,7 +270,7 @@ def test_pruning_with_given_intermediate_and_asked_out(): assert netop({"given-1": 5, "b": 2, "given-2": 2}) == exp # FAILS # - on v1.2.4 with KeyError: 'a', - # - on #19 (unsatisfied) with no result. + # - on #18 (unsatisfied) with no result. assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") @@ -348,6 +348,38 @@ def addplusplus(a, b, c=0): assert 'sum2' in results +def test_deleteinstructs_vary_with_inputs(): + # Check #21: DeleteInstructions positions vary when inputs change. + netop = compose(name="netop")( + operation(name="a free without b", needs=["a"], provides=["aa"])(identity), + operation(name="satisfiable", needs=["a", "b"], provides=["ab"])(add), + operation(name="optional ab", needs=["aa", modifiers.optional("ab")], provides=["asked"]) + (lambda a, ab=10: a + ab), + ) + + inp = {"a": 2, "b": 3} + exp = inp.copy(); exp.update({"aa": 2, "ab": 5, "asked": 7}) + res = netop(inp) + assert res == exp # ok + steps11 = netop.net.steps + res = netop(inp, outputs=["asked"]) + assert res == filtdict(exp, "asked") # ok + steps12 = netop.net.steps + + inp = {"a": 2} + exp = inp.copy(); exp.update({"aa": 2, "asked": 12}) + res = netop(inp) + assert res == exp # ok + steps21 = netop.net.steps + res = netop(inp, outputs=["asked"]) + assert res == filtdict(exp, "asked") # ok + steps22 = netop.net.steps + + assert steps11 == steps12 + assert steps21 == steps22 + assert steps11 != steps21 # FAILs in v1.2.4 + #18 + assert steps12 != steps22 # FAILs in v1.2.4 + #18 + def test_parallel_execution(): import time From 619cae72af228a9f1885dbc17f97a52f1c9e2d00 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 23:28:03 +0300 Subject: [PATCH 022/167] ENH(net): move compile() after SOLVE DAG ... to pass +TC checking DeleteInst vary when inputs change. - x4 TCs still failing, and need revamp of dag-solution. --- graphkit/network.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 9ebdfd27..60d959b1 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -125,7 +125,7 @@ def show_layers(self): print("") - def compile(self): + def compile(self, dag): """ Create a list of operations to evaluate layers and free memory asap @@ -142,7 +142,7 @@ def compile(self): self.steps = [] # create an execution order such that each layer's needs are provided. - ordered_nodes = iset(nx.topological_sort(self.graph)) + ordered_nodes = iset(nx.topological_sort(dag)) # add Operations evaluation steps, and instructions to free data. for i, node in enumerate(ordered_nodes): @@ -280,6 +280,7 @@ def _find_necessary_steps(self, outputs, inputs): unsatisfiables = self._collect_unsatisfiable_operations(necessary_nodes, inputs) necessary_nodes -= set(unsatisfiables) + self.compile(self.graph.subgraph(necessary_nodes)) necessary_steps = [step for step in self.steps if step in necessary_nodes] # save this result in a precomputed cache for future lookup @@ -310,10 +311,6 @@ def compute(self, outputs, named_inputs, method=None): assert isinstance(outputs, (list, tuple)) or outputs is None,\ "The outputs argument must be a list" - # Compile lazily here. - if not self.steps: - self.compile() - # choose a method of execution if method == "parallel": return self._compute_thread_pool_barrier_method(named_inputs, From eff351d8e10a40a1ecc25e5de2008e7a4cc56045 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 23:21:59 +0300 Subject: [PATCH 023/167] REFACT(NET) COMPILE+COMPUTE... + Read the next doc-only commit to understand changes. + Renamed: + net.steps --> net.execution_plan. + (old)compile() --> _build_execution_plan() + _find_necessary_steps() --> (new)compile() + _solve_dag() compile() became the master function invoking _solve_dag & _build-execution_plan(), and do the caching. + refact(compute()): extract common tasks from sequential/parallel. + refact show_layers() to allow full-print, geting also string (not just printing), and using custom classes for representation. + Raise AssertionError when invalid class in plan. it's a logic error, not a language type-error. --- graphkit/functional.py | 6 +- graphkit/network.py | 181 ++++++++++++++++++++--------------------- test/test_graphkit.py | 8 +- 3 files changed, 97 insertions(+), 98 deletions(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index dcbe2042..c113a298 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -190,7 +190,9 @@ def __call__(self, *operations): merge_set = iset() # Preseve given node order. for op in operations: if isinstance(op, NetworkOperation): - net_ops = filter(lambda x: isinstance(x, Operation), op.net.steps) + op.net.compile() + net_ops = filter(lambda x: isinstance(x, Operation), + op.net.execution_plan) merge_set.update(net_ops) else: merge_set.add(op) @@ -205,7 +207,7 @@ def order_preserving_uniquifier(seq, seen=None): needs = order_preserving_uniquifier(chain(*[op.needs for op in operations]), set(provides)) # unordered, not iterated - # compile network + # Build network net = Network() for op in operations: net.add_op(op) diff --git a/graphkit/network.py b/graphkit/network.py index 60d959b1..e1395141 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -72,14 +72,14 @@ def __init__(self, **kwargs): # this holds the timing information for eache layer self.times = {} - # a compiled list of steps to evaluate layers *in order* and free mem. - self.steps = [] + #: The list of operation-nodes & *instructions* needed to evaluate + #: the given inputs & asked outputs, free memory and avoid overwritting + #: any given intermediate inputs. + self.execution_plan = [] - # This holds a cache of results for the _find_necessary_steps - # function, this helps speed up the compute call as well avoid - # a multithreading issue that is occuring when accessing the - # graph in networkx - self._necessary_steps_cache = {} + #: Speed up :meth:`compile()` call and avoid a multithreading issue(?) + #: that is occuring when accessing the dag in networkx. + self._cached_execution_plans = {} def add_op(self, operation): @@ -107,28 +107,27 @@ def add_op(self, operation): for p in operation.provides: self.graph.add_edge(operation, DataPlaceholderNode(p)) - # clear compiled steps (must recompile after adding new layers) - self.steps = [] + def list_layers(self, debug=False): + # Make a generic plan. + plan = self._build_execution_plan(self.graph) + return [n for n in plan if debug or isinstance(n, Operation)] - def list_layers(self): - assert self.steps, "network must be compiled before listing layers." - return [(s.name, s) for s in self.steps if isinstance(s, Operation)] - - - def show_layers(self): - """Shows info (name, needs, and provides) about all layers in this network.""" - for name, step in self.list_layers(): - print("layer_name: ", name) - print("\t", "needs: ", step.needs) - print("\t", "provides: ", step.provides) - print("") + def show_layers(self, debug=False, ret=False): + """Shows info (name, needs, and provides) about all operations in this dag.""" + s = "\n".join(repr(n) for n in self.list_layers(debug=debug)) + if ret: + return s + else: + print(s) - def compile(self, dag): + def _build_execution_plan(self, dag): """ - Create a list of operations to evaluate layers and free memory asap + Create the list of operation-nodes & *instructions* evaluating all + operations & instructions needed a) to free memory and b) avoid + overwritting given intermediate inputs. In the list :class:`DeleteInstructions` steps (DA) are inserted between operation nodes to reduce the memory footprint of cached results. @@ -136,10 +135,10 @@ def compile(self, dag): further down the DAG. Note that since the *cache* is not reused across `compute()` invocations, any memory-reductions are for as long as a single computation runs. + """ - # clear compiled steps - self.steps = [] + plan = [] # create an execution order such that each layer's needs are provided. ordered_nodes = iset(nx.topological_sort(dag)) @@ -152,8 +151,7 @@ def compile(self, dag): elif isinstance(node, Operation): - # add layer to list of steps - self.steps.append(node) + plan.append(node) # Add instructions to delete predecessors as possible. A # predecessor may be deleted if it is a data placeholder that @@ -167,11 +165,12 @@ def compile(self, dag): else: if self._debug: print(" adding delete instruction for %s" % need) - self.steps.append(DeleteInstruction(need)) + plan.append(DeleteInstruction(need)) else: - raise TypeError("Unrecognized network graph node %s" % type(node)) + raise AssertionError("Unrecognized network graph node %r" % node) + return plan def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): """ @@ -183,7 +182,7 @@ def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): :param necessary_nodes: the subset of the graph to consider but WITHOUT the initial data - (because that is what :meth:`_find_necessary_steps()` can gives us...) + (because that is what :meth:`compile()` can gives us...) :param inputs: an iterable of the names of the input values return: @@ -203,42 +202,36 @@ def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): ok_data.update(G.adj[node]) else: unsatisfiables.append(node) - elif isinstance(node, (DataPlaceholderNode, str)) and node in ok_data: - # mark satisfied-needs on all future operations - for future_op in G.adj[node]: - op_satisfaction[future_op].add(node) + elif isinstance(node, (DataPlaceholderNode, str)): # `str` are givens + if node in ok_data: + # mark satisfied-needs on all future operations + for future_op in G.adj[node]: + op_satisfaction[future_op].add(node) + else: + raise AssertionError("Unrecognized network graph node %r" % node) return unsatisfiables - def _find_necessary_steps(self, outputs, inputs): + def _solve_dag(self, outputs, inputs): """ - Determines what graph steps need to pe run to get to the requested + Determines what graph steps need to run to get to the requested outputs from the provided inputs. Eliminates steps that come before (in topological order) any inputs that have been provided. Also eliminates steps that are not on a path from the provided inputs to the requested outputs. - :param list outputs: + :param iterable outputs: A list of desired output names. This can also be ``None``, in which case the necessary steps are all graph nodes that are reachable from one of the provided inputs. - :param dict inputs: + :param iterable inputs: A dictionary mapping names to values for all provided inputs. - :returns: - Returns a list of all the steps that need to be run for the - provided inputs and requested outputs. - """ - - # return steps if it has already been computed before for this set of inputs and outputs - outputs = tuple(sorted(outputs)) if isinstance(outputs, (list, set, iset)) else outputs - inputs_keys = tuple(sorted(inputs.keys())) - cache_key = (inputs_keys, outputs) - if cache_key in self._necessary_steps_cache: - return self._necessary_steps_cache[cache_key] + :return: + """ graph = self.graph if not outputs: @@ -280,14 +273,31 @@ def _find_necessary_steps(self, outputs, inputs): unsatisfiables = self._collect_unsatisfiable_operations(necessary_nodes, inputs) necessary_nodes -= set(unsatisfiables) - self.compile(self.graph.subgraph(necessary_nodes)) - necessary_steps = [step for step in self.steps if step in necessary_nodes] + shrinked_dag = graph.subgraph(necessary_nodes) - # save this result in a precomputed cache for future lookup - self._necessary_steps_cache[cache_key] = necessary_steps + return shrinked_dag + + + def compile(self, outputs=(), inputs=()): + """ + See :meth:`_solve_dag()` for parameters and description + + Handles caching of solved dag and sets the :attr:`execution_plan`. + """ + + # return steps if it has already been computed before for this set of inputs and outputs + if outputs is not None and not isinstance(outputs, str): + outputs = tuple(sorted(outputs)) + inputs_keys = tuple(sorted(inputs)) + cache_key = (inputs_keys, outputs) + if cache_key in self._cached_execution_plans: + self.execution_plan = self._cached_execution_plans[cache_key] + else: + dag = self._solve_dag(outputs, inputs) + plan = self._build_execution_plan(dag) + # save this result in a precomputed cache for future lookup + self.execution_plan = self._cached_execution_plans[cache_key] = plan - # Return an ordered list of the needed steps. - return necessary_steps def compute(self, outputs, named_inputs, method=None): @@ -311,17 +321,31 @@ def compute(self, outputs, named_inputs, method=None): assert isinstance(outputs, (list, tuple)) or outputs is None,\ "The outputs argument must be a list" + # start with fresh data cache + cache = {} + cache.update(named_inputs) + self.compile(outputs, named_inputs.keys()) + # choose a method of execution if method == "parallel": - return self._compute_thread_pool_barrier_method(named_inputs, - outputs) + self._compute_thread_pool_barrier_method(cache) else: - return self._compute_sequential_method(named_inputs, - outputs) + self._compute_sequential_method(cache, outputs) + if not outputs: + # Return the whole cache as output, including input and + # intermediate data nodes. + return cache - def _compute_thread_pool_barrier_method(self, named_inputs, outputs, - thread_pool_size=10): + else: + # Filter outputs to just return what's needed. + # Note: list comprehensions exist in python 2.7+ + return dict(i for i in cache.items() if i[0] in outputs) + + + def _compute_thread_pool_barrier_method( + self, cache, thread_pool_size=10 + ): """ This method runs the graph using a parallel pool of thread executors. You may achieve lower total latency if your graph is sufficiently @@ -334,9 +358,6 @@ def _compute_thread_pool_barrier_method(self, named_inputs, outputs, self._thread_pool = Pool(thread_pool_size) pool = self._thread_pool - cache = {} - cache.update(named_inputs) - necessary_nodes = self._find_necessary_steps(outputs, named_inputs) # this keeps track of all nodes that have already executed has_executed = set() # unordered, not iterated @@ -349,7 +370,7 @@ def _compute_thread_pool_barrier_method(self, named_inputs, outputs, # the upnext list contains a list of operations for scheduling # in the current round of scheduling upnext = [] - for node in necessary_nodes: + for node in self.execution_plan: # only delete if all successors for the data node have been executed if isinstance(node, DeleteInstruction): if ready_to_delete_data_node(node, @@ -378,27 +399,13 @@ def _compute_thread_pool_barrier_method(self, named_inputs, outputs, cache.update(result) has_executed.add(op) - if not outputs: - return cache - else: - return {k: cache[k] for k in iter(cache) if k in outputs} - def _compute_sequential_method(self, named_inputs, outputs): + def _compute_sequential_method(self, cache, outputs): """ This method runs the graph one operation at a time in a single thread """ - # start with fresh data cache - cache = {} - - # add inputs to data cache - cache.update(named_inputs) - - # Find the subset of steps we need to run to get to the requested - # outputs from the provided inputs. - all_steps = self._find_necessary_steps(outputs, named_inputs) - self.times = {} - for step in all_steps: + for step in self.execution_plan: if isinstance(step, Operation): @@ -435,17 +442,7 @@ def _compute_sequential_method(self, named_inputs, outputs): cache.pop(step) else: - raise TypeError("Unrecognized instruction.") - - if not outputs: - # Return the whole cache as output, including input and - # intermediate data nodes. - return cache - - else: - # Filter outputs to just return what's needed. - # Note: list comprehensions exist in python 2.7+ - return {k: cache[k] for k in iter(cache) if k in outputs} + raise AssertionError("Unrecognized instruction.%r" % step) def plot(self, filename=None, show=False): diff --git a/test/test_graphkit.py b/test/test_graphkit.py index dd0ef5cf..be4b0e86 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -361,19 +361,19 @@ def test_deleteinstructs_vary_with_inputs(): exp = inp.copy(); exp.update({"aa": 2, "ab": 5, "asked": 7}) res = netop(inp) assert res == exp # ok - steps11 = netop.net.steps + steps11 = netop.net.execution_plan res = netop(inp, outputs=["asked"]) assert res == filtdict(exp, "asked") # ok - steps12 = netop.net.steps + steps12 = netop.net.execution_plan inp = {"a": 2} exp = inp.copy(); exp.update({"aa": 2, "asked": 12}) res = netop(inp) assert res == exp # ok - steps21 = netop.net.steps + steps21 = netop.net.execution_plan res = netop(inp, outputs=["asked"]) assert res == filtdict(exp, "asked") # ok - steps22 = netop.net.steps + steps22 = netop.net.execution_plan assert steps11 == steps12 assert steps21 == steps22 From d9594855e7ceba6d353fd98ba165623d74f0b6ff Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 2 Oct 2019 19:55:49 +0300 Subject: [PATCH 024/167] doc(net): explain new DAG SOLUTION --- graphkit/network.py | 59 ++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index e1395141..0000dd52 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -27,8 +27,8 @@ def __repr__(self): class DeleteInstruction(str): """ - An instruction in the compiled list of operation steps to free or delete - a Data instance from the Network's cache after it is no longer needed. + An instruction in the *execution plan* to free or delete a Data instance + from the Network's cache after it is no longer needed. """ def __repr__(self): return 'DeleteInstruction("%s")' % self @@ -41,24 +41,33 @@ class Network(object): and pass data through. The computation, ie the execution of the *operations* for given *inputs* - and asked *outputs* is based on 4 data-structures: + and asked *outputs* is based on 3 data-structures: - - The `networkx` :attr:`graph` DAG, containing interchanging layers of + - The ``networkx`` :attr:`graph` DAG, containing interchanging layers of :class:`Operation` and :class:`DataPlaceholderNode` nodes. - They are layed out and connected by :meth:`add_OP`. + They are layed out and connected by repeated calls of :meth:`add_OP`. - - the :attr:`steps` list holding all operation nodes in *execution order*. - It is constructed in :meth:`compile()` after all nodes have been added - into the `graph`. + When the computation starts, :meth:`compile()` extracts a *DAG subgraph* + by *pruning* nodes based on given inputs and requested outputs. + This subgraph is used to decide the `execution_plan` (see below), and + and is cached in :attr:`_cached_execution_plans` across runs with + thre inputs/outputs as key. - - The ``necessary_steps`` list which is the *DAG solution* of each run, and - is always a subset of :attr:`steps`. - It is computed by :meth:`_find_necessary_steps()` and cached in - :attr:`_necessary_steps_cache` across runs with the same inputs/outputs. + - the :attr:`execution_plan` lists the operation-nodes & *instructions* + needed to run a complete computation. + It is built in :meth:`_build_execution_plan()` based on the subgraph + extracted above. The *instructions* items achieve the following: + + - :class:`DeleteInstruction`: delete items from values-cache as soon as + they are not needed further down the dag, to reduce memory footprint + while computing. + + - :class:`PinInstruction`: avoid overwritting any given intermediate + inputs, and still allow their producing operations to run. - the :var:`cache` local-var, initialized on each run of both ``_compute_xxx`` methods (for parallel or sequential executions), to - holding all given input & generated (aka intermediate) data values. + hold all given input & generated (aka intermediate) data values. """ def __init__(self, **kwargs): @@ -85,8 +94,8 @@ def __init__(self, **kwargs): def add_op(self, operation): """ Adds the given operation and its data requirements to the network graph - based on the name of the operation, the names of the operation's needs, and - the names of the data it provides. + based on the name of the operation, the names of the operation's needs, + and the names of the data it provides. :param Operation operation: Operation object to add. """ @@ -125,10 +134,13 @@ def show_layers(self, debug=False, ret=False): def _build_execution_plan(self, dag): """ Create the list of operation-nodes & *instructions* evaluating all - + operations & instructions needed a) to free memory and b) avoid overwritting given intermediate inputs. + :param dag: + as shrinked by :meth:`compile()` + In the list :class:`DeleteInstructions` steps (DA) are inserted between operation nodes to reduce the memory footprint of cached results. A DA is inserted whenever a *need* is not used by any other *operation* @@ -227,9 +239,10 @@ def _solve_dag(self, outputs, inputs): from one of the provided inputs. :param iterable inputs: - A dictionary mapping names to values for all provided inputs. + The inputs names of all given inputs. :return: + the subgraph comprising the solution """ graph = self.graph @@ -280,9 +293,17 @@ def _solve_dag(self, outputs, inputs): def compile(self, outputs=(), inputs=()): """ - See :meth:`_solve_dag()` for parameters and description + Solve dag, set the :attr:`execution_plan` and cache it. + + See :meth:`_solve_dag()` for description + + :param iterable outputs: + A list of desired output names. This can also be ``None``, in which + case the necessary steps are all graph nodes that are reachable + from one of the provided inputs. - Handles caching of solved dag and sets the :attr:`execution_plan`. + :param dict inputs: + The inputs names of all given inputs. """ # return steps if it has already been computed before for this set of inputs and outputs From 17eb2fdfbf63c8f798a34720abe9371a3d189137 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 3 Oct 2019 15:29:12 +0300 Subject: [PATCH 025/167] FIX(net): new Ops invalidate execution-plan cache... Probaly unreported bug in v1.2.4 for '_neccessary_steps_cache`. --- graphkit/network.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/graphkit/network.py b/graphkit/network.py index 0000dd52..06447271 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -108,6 +108,9 @@ def add_op(self, operation): # assert layer is only added once to graph assert operation not in self.graph.nodes(), "Operation may only be added once" + ## Invalidate old plans. + self._cached_execution_plans = {} + # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: self.graph.add_edge(DataPlaceholderNode(n), operation) From 0830b7ce0d5cd44f888397e909af00ec1828e494 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 3 Oct 2019 16:27:07 +0300 Subject: [PATCH 026/167] ENH(DAG): NEW SOLVER + Pruning behaves correctly also when outputs given; this happens by breaking incoming provide-links to any given intermedediate inputs. + Unsatisfied detection now includes those without outputs due to broken links (above). + Remove some uneeded "glue" from unsatisfied-detection code, leftover from previous compile() refactoring. + Renamed satisfiable --> satisfied. + Improved unknown output requested raise-message. + x3 TCs PASS, x1 in #24 and the first x2 in #25. - 1x TCs in #25 still FAIL, and need "Pinning" of given-inputs (the operation MUST and MUST NOT run in these cases). --- graphkit/network.py | 156 +++++++++++++++++++++++------------------- test/test_graphkit.py | 5 +- 2 files changed, 87 insertions(+), 74 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 06447271..4ef4b4c4 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -137,12 +137,12 @@ def show_layers(self, debug=False, ret=False): def _build_execution_plan(self, dag): """ Create the list of operation-nodes & *instructions* evaluating all - + operations & instructions needed a) to free memory and b) avoid overwritting given intermediate inputs. :param dag: - as shrinked by :meth:`compile()` + the original dag but "shrinked", not "broken" In the list :class:`DeleteInstructions` steps (DA) are inserted between operation nodes to reduce the memory footprint of cached results. @@ -187,45 +187,57 @@ def _build_execution_plan(self, dag): return plan - def _collect_unsatisfiable_operations(self, necessary_nodes, inputs): + def _collect_unsatisfied_operations(self, dag, inputs): """ - Traverse ordered graph and mark satisfied needs on each operation, + Traverse topologically sorted dag to collect un-satisfied operations. + + Unsatisfied operations are those suffering from ANY of the following: - collecting those missing at least one. - Since the graph is ordered, as soon as we're on an operation, - all its needs have been accounted, so we can get its satisfaction. + - They are missing at least one compulsory need-input. + Since the dag is ordered, as soon as we're on an operation, + all its needs have been accounted, so we can get its satisfaction. - :param necessary_nodes: - the subset of the graph to consider but WITHOUT the initial data - (because that is what :meth:`compile()` can gives us...) + - Their provided outputs are not linked to any data in the dag. + An operation might not have any output link when :meth:`_solve_dag()` + has broken them, due to given intermediate inputs. + + :param dag: + the graph to consider :param inputs: an iterable of the names of the input values return: - a list of unsatisfiable operations + a list of unsatisfied operations to prune """ - G = self.graph # shortcut - ok_data = set(inputs) # to collect producible data - op_satisfaction = defaultdict(set) # to collect operation satisfiable needs - unsatisfiables = [] # to collect operations with partial needs - # We also need inputs to mark op_satisfaction. - nodes = chain(necessary_nodes, inputs) # note that `inputs` are plain strings - for node in nx.topological_sort(G.subgraph(nodes)): + # To collect data that will be produced. + ok_data = set(inputs) + # To colect the map of operations --> satisfied-needs. + op_satisfaction = defaultdict(set) + # To collect the operations to drop. + unsatisfied = [] + for node in nx.topological_sort(dag): if isinstance(node, Operation): - real_needs = set(n for n in node.needs if not isinstance(n, optional)) - if real_needs.issubset(op_satisfaction[node]): - # mark all future data-provides as ok - ok_data.update(G.adj[node]) + if not dag.adj[node]: + # Prune operations that ended up providing no output. + unsatisfied.append(node) else: - unsatisfiables.append(node) + real_needs = set(n for n in node.needs + if not isinstance(n, optional)) + if real_needs.issubset(op_satisfaction[node]): + # We have a satisfied operation; mark its output-data + # as ok. + ok_data.update(dag.adj[node]) + else: + # Prune operations with partial inputs. + unsatisfied.append(node) elif isinstance(node, (DataPlaceholderNode, str)): # `str` are givens if node in ok_data: # mark satisfied-needs on all future operations - for future_op in G.adj[node]: + for future_op in dag.adj[node]: op_satisfaction[future_op].add(node) else: raise AssertionError("Unrecognized network graph node %r" % node) - return unsatisfiables + return unsatisfied def _solve_dag(self, outputs, inputs): @@ -245,60 +257,56 @@ def _solve_dag(self, outputs, inputs): The inputs names of all given inputs. :return: - the subgraph comprising the solution - + the *execution plan* """ - graph = self.graph - if not outputs: + dag = self.graph - # If caller requested all outputs, the necessary nodes are all - # nodes that are reachable from one of the inputs. Ignore input - # names that aren't in the graph. - necessary_nodes = set() # unordered, not iterated - for input_name in iter(inputs): - if graph.has_node(input_name): - necessary_nodes |= nx.descendants(graph, input_name) + # Ignore input names that aren't in the graph. + graph_inputs = iset(dag.nodes) & inputs # preserve order - else: + # Scream if some requested outputs aren't in the graph. + unknown_outputs = iset(outputs) - dag.nodes + if unknown_outputs: + raise ValueError( + "Unknown output node(s) requested: %s" + % ", ".join(unknown_outputs)) + + broken_dag = dag.copy() # preserve net's graph - # If the caller requested a subset of outputs, find any nodes that - # are made unecessary because we were provided with an input that's - # deeper into the network graph. Ignore input names that aren't - # in the graph. - unnecessary_nodes = set() # unordered, not iterated - for input_name in iter(inputs): - if graph.has_node(input_name): - unnecessary_nodes |= nx.ancestors(graph, input_name) - - # Find the nodes we need to be able to compute the requested - # outputs. Raise an exception if a requested output doesn't - # exist in the graph. - necessary_nodes = set() # unordered, not iterated - for output_name in outputs: - if not graph.has_node(output_name): - raise ValueError("graphkit graph does not have an output " - "node named %s" % output_name) - necessary_nodes |= nx.ancestors(graph, output_name) - - # Get rid of the unnecessary nodes from the set of necessary ones. - necessary_nodes -= unnecessary_nodes - - # Drop (un-satifiable) operations with partial inputs. + # Break the incoming edges to all given inputs. + # + # Nodes producing any given intermediate inputs are unecessary + # (unless they are also used elsewhere). + # To discover which ones to prune, we break their incoming edges + # and they will drop out while collecting ancestors from the outputs. + for given in graph_inputs: + broken_dag.remove_edges_from(list(broken_dag.in_edges(given))) + + if outputs: + # If caller requested specific outputs, we can prune any + # unrelated nodes further up the dag. + ending_in_outputs = set() + for input_name in outputs: + ending_in_outputs.update(nx.ancestors(dag, input_name)) + broken_dag = broken_dag.subgraph(ending_in_outputs | set(outputs)) + + + # Prune (un-satifiable) operations with partial inputs. # See yahoo/graphkit#18 # - unsatisfiables = self._collect_unsatisfiable_operations(necessary_nodes, inputs) - necessary_nodes -= set(unsatisfiables) + unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) + shrinked_dag = dag.subgraph(broken_dag.nodes - unsatisfied) - shrinked_dag = graph.subgraph(necessary_nodes) + plan = self._build_execution_plan(shrinked_dag) - return shrinked_dag + return plan def compile(self, outputs=(), inputs=()): """ - Solve dag, set the :attr:`execution_plan` and cache it. + Solve dag, set the :attr:`execution_plan`, and cache it. - See :meth:`_solve_dag()` for description + See :meth:`_solve_dag()` for detailed description. :param iterable outputs: A list of desired output names. This can also be ``None``, in which @@ -306,7 +314,7 @@ def compile(self, outputs=(), inputs=()): from one of the provided inputs. :param dict inputs: - The inputs names of all given inputs. + The input names of all given inputs. """ # return steps if it has already been computed before for this set of inputs and outputs @@ -317,8 +325,7 @@ def compile(self, outputs=(), inputs=()): if cache_key in self._cached_execution_plans: self.execution_plan = self._cached_execution_plans[cache_key] else: - dag = self._solve_dag(outputs, inputs) - plan = self._build_execution_plan(dag) + plan = self._solve_dag(outputs, inputs) # save this result in a precomputed cache for future lookup self.execution_plan = self._cached_execution_plans[cache_key] = plan @@ -338,6 +345,10 @@ def compute(self, outputs, named_inputs, method=None): and the values are the concrete values you want to set for the data node. + :param method: + if ``"parallel"``, launches multi-threading. + Set when invoking a composed graph or by + :meth:`~NetworkOperation.set_execution_method()`. :returns: a dictionary of output data objects, keyed by name. """ @@ -345,9 +356,10 @@ def compute(self, outputs, named_inputs, method=None): assert isinstance(outputs, (list, tuple)) or outputs is None,\ "The outputs argument must be a list" - # start with fresh data cache - cache = {} - cache.update(named_inputs) + # start with fresh data cache & overwrites + cache = named_inputs.copy() + + # Build and set :attr:`execution_plan`. self.compile(outputs, named_inputs.keys()) # choose a method of execution diff --git a/test/test_graphkit.py b/test/test_graphkit.py index be4b0e86..47f536b3 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -245,7 +245,7 @@ def test_pruning_multiouts_not_override_intermediates2(): operation(name="op2", needs=["d", "e"], provides=["asked"])(mul), ) - exp = {"a": 5, "overriden": 1, "c": 2, "asked": 3} + exp = {"a": 5, "overriden": 1, "c": 2, "d": 3, "e": 10, "asked": 30} # FAILs # - on v1.2.4 with (overriden, asked) = (5, 70) instead of (1, 13) # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. @@ -265,12 +265,13 @@ def test_pruning_with_given_intermediate_and_asked_out(): operation(name="good_op", needs=["a", "given-2"], provides=["asked"])(add), ) - exp = {"given-1": 5, "b": 2, "given-2": 7, "a": 5, "asked": 12} + exp = {"given-1": 5, "b": 2, "given-2": 2, "a": 5, "asked": 7} # v1.2.4 is ok assert netop({"given-1": 5, "b": 2, "given-2": 2}) == exp # FAILS # - on v1.2.4 with KeyError: 'a', # - on #18 (unsatisfied) with no result. + # FIXED on #18+#26 (new dag solver). assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") From 32409f6342b5dc136c1c6d150171a2abe9912a3e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 3 Oct 2019 20:34:44 +0300 Subject: [PATCH 027/167] enh(build): replace numpy with pytest... numpy was used just for its assert_raise --- setup.py | 2 +- test/test_graphkit.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index d3dfec84..d4b7c378 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ extras_require={ 'plot': ['pydot', 'matplotlib'] }, - tests_require=['numpy'], + tests_require=['pytest'], license='Apache-2.0', keywords=['graph', 'computation graph', 'DAG', 'directed acyclical graph'], classifiers=[ diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..7db2e973 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -6,7 +6,8 @@ from pprint import pprint from operator import add -from numpy.testing import assert_raises + +import pytest import graphkit.network as network import graphkit.modifiers as modifiers @@ -180,9 +181,10 @@ def test_pruning_raises_for_bad_output(): # Request two outputs we can compute and one we can't compute. Assert # that this raises a ValueError. - assert_raises(ValueError, net, {'a': 1, 'b': 2, 'c': 3, 'd': 4}, - outputs=['sum1', 'sum3', 'sum4']) - + with pytest.raises(ValueError) as exinfo: + net({'a': 1, 'b': 2, 'c': 3, 'd': 4}, + outputs=['sum1', 'sum3', 'sum4']) + assert exinfo.match('sum4') def test_optional(): # Test that optional() needs work as expected. From f606ed1f147cba8956012c0a35475af052bcd361 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 3 Oct 2019 20:35:56 +0300 Subject: [PATCH 028/167] feat(build): add pip-extras [test] --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d4b7c378..51d606fc 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,8 @@ "networkx == 2.2; python_version < '3.5'", ], extras_require={ - 'plot': ['pydot', 'matplotlib'] + 'plot': ['pydot', 'matplotlib'], + 'test': ['pydot', 'matplotlib', 'pytest'], }, tests_require=['pytest'], license='Apache-2.0', From 0dc1293111506ed2a2121d297d9ff2b1afde55ef Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 3 Oct 2019 19:45:45 +0300 Subject: [PATCH 029/167] WIP/FIX(prune,#26): PIN intermediate inputs if operation before must run - WIP: PARALLEL execution not adding PINS! + Insert "PinInstructions" in the execution-plan to avoid overwritting. + Add `_overwrite_collector` in `compose()` to collect re-calculated values. + FIX the last TC in #25. --- graphkit/base.py | 42 ++++++++++++++++++++---- graphkit/network.py | 74 ++++++++++++++++++++++++++++++++----------- test/test_graphkit.py | 68 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 156 insertions(+), 28 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 1c04e8d5..2e036468 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -1,5 +1,10 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +try: + from collections import abc +except ImportError: + import collections as abc + class Data(object): """ @@ -151,9 +156,12 @@ def __init__(self, **kwargs): # set execution mode to single-threaded sequential by default self._execution_method = "sequential" + self._overwrites_collector = None def _compute(self, named_inputs, outputs=None): - return self.net.compute(outputs, named_inputs, method=self._execution_method) + return self.net.compute( + outputs, named_inputs, method=self._execution_method, + overwrites_collector=self._overwrites_collector) def __call__(self, *args, **kwargs): return self._compute(*args, **kwargs) @@ -162,15 +170,35 @@ def set_execution_method(self, method): """ Determine how the network will be executed. - Args: - method: str - If "parallel", execute graph operations concurrently - using a threadpool. + :param str method: + If "parallel", execute graph operations concurrently + using a threadpool. """ - options = ['parallel', 'sequential'] - assert method in options + choices = ['parallel', 'sequential'] + if method not in choices: + raise ValueError( + "Invalid computation method %r! Must be one of %s" + (method, choices)) self._execution_method = method + def set_overwrites_collector(self, collector): + """ + Asks to put all *overwrites* into the `collector` after computing + + An "overwrites" is intermediate value calculated but NOT stored + into the results, becaues it has been given also as an intemediate + input value, and the operation that would overwrite it MUST run for + its other results. + + :param collector: + a mutable dict to be fillwed with named values + """ + if collector is not None and not isinstance(collector, abc.MutableMapping): + raise ValueError( + "Overwrites collector was not a MutableMapping, but: %r" + % collector) + self._overwrites_collector = collector + def plot(self, filename=None, show=False): self.net.plot(filename=filename, show=show) diff --git a/graphkit/network.py b/graphkit/network.py index 4ef4b4c4..a00a0c4c 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -34,6 +34,18 @@ def __repr__(self): return 'DeleteInstruction("%s")' % self +class PinInstruction(str): + """ + An instruction in the *execution plan* not to store the newly compute value + into network's values-cache but to pin it instead to some given value. + It is used ensure that given intermediate values are not overwritten when + their providing functions could not be avoided, because their other outputs + are needed elesewhere. + """ + def __repr__(self): + return 'PinInstruction("%s")' % self + + class Network(object): """ This is the main network implementation. The class contains all of the @@ -41,7 +53,7 @@ class Network(object): and pass data through. The computation, ie the execution of the *operations* for given *inputs* - and asked *outputs* is based on 3 data-structures: + and asked *outputs* is based on 4 data-structures: - The ``networkx`` :attr:`graph` DAG, containing interchanging layers of :class:`Operation` and :class:`DataPlaceholderNode` nodes. @@ -68,6 +80,12 @@ class Network(object): - the :var:`cache` local-var, initialized on each run of both ``_compute_xxx`` methods (for parallel or sequential executions), to hold all given input & generated (aka intermediate) data values. + + - the :var:`overwrites` local-var, initialized on each run of both + ``_compute_xxx`` methods (for parallel or sequential executions), to + hold values calculated but overwritten (aka "pinned") by intermediate + input-values. + """ def __init__(self, **kwargs): @@ -122,7 +140,7 @@ def add_op(self, operation): def list_layers(self, debug=False): # Make a generic plan. - plan = self._build_execution_plan(self.graph) + plan = self._build_execution_plan(self.graph, ()) return [n for n in plan if debug or isinstance(n, Operation)] @@ -134,7 +152,7 @@ def show_layers(self, debug=False, ret=False): else: print(s) - def _build_execution_plan(self, dag): + def _build_execution_plan(self, dag, inputs): """ Create the list of operation-nodes & *instructions* evaluating all @@ -142,7 +160,7 @@ def _build_execution_plan(self, dag): overwritting given intermediate inputs. :param dag: - the original dag but "shrinked", not "broken" + The original dag, pruned; not broken. In the list :class:`DeleteInstructions` steps (DA) are inserted between operation nodes to reduce the memory footprint of cached results. @@ -158,11 +176,15 @@ def _build_execution_plan(self, dag): # create an execution order such that each layer's needs are provided. ordered_nodes = iset(nx.topological_sort(dag)) - # add Operations evaluation steps, and instructions to free data. + # Add Operations evaluation steps, and instructions to free and "pin" + # data. for i, node in enumerate(ordered_nodes): if isinstance(node, DataPlaceholderNode): - continue + if node in inputs and dag.pred[node]: + # Command pinning only when there is another operation + # generating this data as output. + plan.append(PinInstruction(node)) elif isinstance(node, Operation): @@ -291,13 +313,11 @@ def _solve_dag(self, outputs, inputs): broken_dag = broken_dag.subgraph(ending_in_outputs | set(outputs)) - # Prune (un-satifiable) operations with partial inputs. - # See yahoo/graphkit#18 - # + # Prune unsatisfied operations (those with partial inputs or no outputs). unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) - shrinked_dag = dag.subgraph(broken_dag.nodes - unsatisfied) + pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied) - plan = self._build_execution_plan(shrinked_dag) + plan = self._build_execution_plan(pruned_dag, inputs) return plan @@ -331,7 +351,8 @@ def compile(self, outputs=(), inputs=()): - def compute(self, outputs, named_inputs, method=None): + def compute( + self, outputs, named_inputs, method=None, overwrites_collector=None): """ Run the graph. Any inputs to the network must be passed in by name. @@ -350,6 +371,10 @@ def compute(self, outputs, named_inputs, method=None): Set when invoking a composed graph or by :meth:`~NetworkOperation.set_execution_method()`. + :param overwrites_collector: + (optional) a mutable dict to be fillwed with named values. + If missing, values are simply discarded. + :returns: a dictionary of output data objects, keyed by name. """ @@ -364,23 +389,34 @@ def compute(self, outputs, named_inputs, method=None): # choose a method of execution if method == "parallel": - self._compute_thread_pool_barrier_method(cache) + self._compute_thread_pool_barrier_method( + cache, overwrites_collector, named_inputs) else: - self._compute_sequential_method(cache, outputs) + self._compute_sequential_method( + cache, overwrites_collector, named_inputs, outputs) if not outputs: # Return the whole cache as output, including input and # intermediate data nodes. - return cache + result = cache else: # Filter outputs to just return what's needed. # Note: list comprehensions exist in python 2.7+ - return dict(i for i in cache.items() if i[0] in outputs) + result = dict(i for i in cache.items() if i[0] in outputs) + + return result + + + def _pin_data_in_cache(self, value_name, cache, inputs, overwrites): + value_name = str(value_name) + if overwrites is not None: + overwrites[value_name] = cache[value_name] + cache[value_name] = inputs[value_name] def _compute_thread_pool_barrier_method( - self, cache, thread_pool_size=10 + self, cache, overwrites, inputs, thread_pool_size=10 ): """ This method runs the graph using a parallel pool of thread executors. @@ -436,7 +472,7 @@ def _compute_thread_pool_barrier_method( has_executed.add(op) - def _compute_sequential_method(self, cache, outputs): + def _compute_sequential_method(self, cache, overwrites, inputs, outputs): """ This method runs the graph one operation at a time in a single thread """ @@ -477,6 +513,8 @@ def _compute_sequential_method(self, cache, outputs): print("removing data '%s' from cache." % step) cache.pop(step) + elif isinstance(step, PinInstruction): + self._pin_data_in_cache(step, cache, inputs, overwrites) else: raise AssertionError("Unrecognized instruction.%r" % step) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 47f536b3..ce9b80d6 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -13,6 +13,11 @@ from graphkit import operation, compose, Operation +def scream(*args, **kwargs): + raise AssertionError( + "Must not have run!\n args: %s\n kwargs: %s", (args, kwargs)) + + def identity(x): return x @@ -200,9 +205,9 @@ def test_pruning_raises_for_bad_output(): def test_pruning_not_overrides_given_intermediate(): - # Test #25: v1.2.4 overrides intermediate data when no output asked + # Test #25: v1.2.4 overwrites intermediate data when no output asked netop = compose(name="netop")( - operation(name="unjustly run", needs=["a"], provides=["overriden"])(identity), + operation(name="unjustly run", needs=["a"], provides=["overriden"])(scream), operation(name="op", needs=["overriden", "c"], provides=["asked"])(add), ) @@ -212,11 +217,24 @@ def test_pruning_not_overrides_given_intermediate(): # FAILs # - on v1.2.4 with (overriden, asked): = (5, 7) instead of (1, 3) # - on #18(unsatisfied) + #23(ordered-sets) with (overriden, asked) = (5, 7) instead of (1, 3) + # FIXED on #26 + assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + + ## Test OVERWITES + # + overwrites = {} + netop.set_overwrites_collector(overwrites) + assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert overwrites == {} # unjust must have been pruned + + overwrites = {} + netop.set_overwrites_collector(overwrites) assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + assert overwrites == {} # unjust must have been pruned def test_pruning_multiouts_not_override_intermediates1(): - # Test #25: v.1.2.4 overrides intermediate data when a previous operation + # Test #25: v.1.2.4 overwrites intermediate data when a previous operation # must run for its other outputs (outputs asked or not) netop = compose(name="netop")( operation(name="must run", needs=["a"], provides=["overriden", "calced"]) @@ -228,11 +246,30 @@ def test_pruning_multiouts_not_override_intermediates1(): # FAILs # - on v1.2.4 with (overriden, asked) = (5, 15) instead of (1, 11) # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. + # FIXED on #26 assert netop({"a": 5, "overriden": 1}) == exp # FAILs # - on v1.2.4 with KeyError: 'e', # - on #18(unsatisfied) + #23(ordered-sets) with empty result. + # FIXED on #26 + assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + + ## Test OVERWITES + # + overwrites = {} + netop.set_overwrites_collector(overwrites) + assert netop({"a": 5, "overriden": 1}) == exp + assert overwrites == {'overriden': 5} + + overwrites = {} + netop.set_overwrites_collector(overwrites) assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert overwrites == {'overriden': 5} + + # ## Test parallel + # netop.set_execution_method("parallel") + # assert netop({"a": 5, "overriden": 1}) == exp + # assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") def test_pruning_multiouts_not_override_intermediates2(): @@ -249,11 +286,25 @@ def test_pruning_multiouts_not_override_intermediates2(): # FAILs # - on v1.2.4 with (overriden, asked) = (5, 70) instead of (1, 13) # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. + # FIXED on #26 assert netop({"a": 5, "overriden": 1, "c": 2}) == exp # FAILs # - on v1.2.4 with KeyError: 'e', # - on #18(unsatisfied) + #23(ordered-sets) with empty result. assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + # FIXED on #26 + + ## Test OVERWITES + # + overwrites = {} + netop.set_overwrites_collector(overwrites) + assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + assert overwrites == {'overriden': 5} + + overwrites = {} + netop.set_overwrites_collector(overwrites) + assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert overwrites == {'overriden': 5} def test_pruning_with_given_intermediate_and_asked_out(): @@ -274,6 +325,17 @@ def test_pruning_with_given_intermediate_and_asked_out(): # FIXED on #18+#26 (new dag solver). assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + ## Test OVERWITES + # + overwrites = {} + netop.set_overwrites_collector(overwrites) + assert netop({"given-1": 5, "b": 2, "given-2": 2}) == exp + assert overwrites == {} + + overwrites = {} + netop.set_overwrites_collector(overwrites) + assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + assert overwrites == {} def test_unsatisfied_operations(): # Test that operations with partial inputs are culled and not failing. From 06f6554869832b89713250971e903ead0934716e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 4 Oct 2019 07:01:35 +0300 Subject: [PATCH 030/167] REFACT(net): part 3 of new dag-solver & pin refactoring - STILL buggy PIN on PARALLEL, 2 DISABLED TCs FAIL: - test_pruning_with_given_intermediate_and_asked_out() - test_unsatisfied_operations_same_out() + move check if value in asked outputs before cache-evicting it in build-execution-plan time - compute methods don't need outputs anymore. + test: speed up parallel/multihtread TCs by reducing delays & repetitions. + refact: network rightfully adopted stray functions for parallel processing - they all worke on the net.graph, + upd: networkx api by indexing on `dag.nodes` views. + enh: add log message when deleting in parallel (in par with sequential code). + refact: var-renames, if-then-else simplifications, pythonisms. + doc: A lot! --- graphkit/network.py | 234 +++++++++++++++++++++++------------------- test/test_graphkit.py | 174 ++++++++++++++++++++----------- 2 files changed, 242 insertions(+), 166 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index a00a0c4c..d5c15539 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -1,6 +1,6 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. - +"""" The main implementation of the network of operations & data to compute. """ import time import os import networkx as nx @@ -18,8 +18,7 @@ class DataPlaceholderNode(str): """ - A node for the Network graph that describes the name of a Data instance - produced or required by a layer. + Dag node naming a data-value produced or required by an operation. """ def __repr__(self): return 'DataPlaceholderNode("%s")' % self @@ -27,8 +26,11 @@ def __repr__(self): class DeleteInstruction(str): """ - An instruction in the *execution plan* to free or delete a Data instance - from the Network's cache after it is no longer needed. + Execution step to delete a computed value from the network's ``cache``. + + It is an :attr:`Network.execution_plan` step for the data-node `str` that + frees its data-value from ``cache`` after it is no longer needed, + to reduce memory footprint while computing the pipeline. """ def __repr__(self): return 'DeleteInstruction("%s")' % self @@ -36,10 +38,14 @@ def __repr__(self): class PinInstruction(str): """ - An instruction in the *execution plan* not to store the newly compute value - into network's values-cache but to pin it instead to some given value. - It is used ensure that given intermediate values are not overwritten when - their providing functions could not be avoided, because their other outputs + Execution step to replace a computed value in the ``cache`` from the inputs, + + and to store the computed one in the ``overwrites`` instead + (both ``cache`` & ``overwrites`` are local-vars in :meth:`Network.compute()`). + + It is an :attr:`Network.execution_plan` step for the data-node `str` that + ensures the corresponding intermediate input-value is not overwritten when + its providing function(s) could not be pruned, because their other outputs are needed elesewhere. """ def __repr__(self): @@ -48,26 +54,36 @@ def __repr__(self): class Network(object): """ - This is the main network implementation. The class contains all of the - code necessary to weave together operations into a directed-acyclic-graph (DAG) - and pass data through. + Assemble operations & data into a directed-acyclic-graph (DAG) and run them + + based on the given input values and requested outputs. - The computation, ie the execution of the *operations* for given *inputs* - and asked *outputs* is based on 4 data-structures: + The execution of *operations* (a computation) is splitted in 2 phases: - - The ``networkx`` :attr:`graph` DAG, containing interchanging layers of + - COMPILE: prune, sort topologically the nodes in the dag, solve it, and + derive the *execution plan* (see below) based on the given *inputs* + and asked *outputs*. + + - EXECUTE: sequential or parallel invocation of the underlying functions + of the operations. + + is based on 4 data-structures: + + - the ``networkx`` :attr:`graph` DAG, containing interchanging layers of :class:`Operation` and :class:`DataPlaceholderNode` nodes. They are layed out and connected by repeated calls of :meth:`add_OP`. - When the computation starts, :meth:`compile()` extracts a *DAG subgraph* - by *pruning* nodes based on given inputs and requested outputs. + The computation starts with :meth:`_solve_dag()` extracting + a *DAG subgraph* by *pruning* nodes based on given inputs and + requested outputs. This subgraph is used to decide the `execution_plan` (see below), and and is cached in :attr:`_cached_execution_plans` across runs with - thre inputs/outputs as key. + inputs/outputs as key. - - the :attr:`execution_plan` lists the operation-nodes & *instructions* - needed to run a complete computation. - It is built in :meth:`_build_execution_plan()` based on the subgraph + - the :attr:`execution_plan` is the list of the operation-nodes only + from the dag (above), topologically sorted, and interspersed with + *instructions steps* needed to complete the run. + It is built by :meth:`_build_execution_plan()` based on the subgraph dag extracted above. The *instructions* items achieve the following: - :class:`DeleteInstruction`: delete items from values-cache as soon as @@ -75,11 +91,12 @@ class Network(object): while computing. - :class:`PinInstruction`: avoid overwritting any given intermediate - inputs, and still allow their producing operations to run. + inputs, and still allow their providing operations to run + (because they are needed for their other outputs). - - the :var:`cache` local-var, initialized on each run of both - ``_compute_xxx`` methods (for parallel or sequential executions), to - hold all given input & generated (aka intermediate) data values. + - the :var:`cache` local-var in :meth:`compute()`, initialized on each run + to hold the values of the given inputs, generated (aka intermediate) data, + and output values. - the :var:`overwrites` local-var, initialized on each run of both ``_compute_xxx`` methods (for parallel or sequential executions), to @@ -124,7 +141,7 @@ def add_op(self, operation): assert operation.provides is not None, "Operation's 'provides' must be named" # assert layer is only added once to graph - assert operation not in self.graph.nodes(), "Operation may only be added once" + assert operation not in self.graph.nodes, "Operation may only be added once" ## Invalidate old plans. self._cached_execution_plans = {} @@ -152,7 +169,7 @@ def show_layers(self, debug=False, ret=False): else: print(s) - def _build_execution_plan(self, dag, inputs): + def _build_execution_plan(self, dag, inputs, outputs): """ Create the list of operation-nodes & *instructions* evaluating all @@ -161,6 +178,8 @@ def _build_execution_plan(self, dag, inputs): :param dag: The original dag, pruned; not broken. + :param outputs: + outp-names to decide whether to add (and which) del-instructions In the list :class:`DeleteInstructions` steps (DA) are inserted between operation nodes to reduce the memory footprint of cached results. @@ -187,9 +206,12 @@ def _build_execution_plan(self, dag, inputs): plan.append(PinInstruction(node)) elif isinstance(node, Operation): - plan.append(node) + # Keep all values in cache if not specific outputs asked. + if not outputs: + continue + # Add instructions to delete predecessors as possible. A # predecessor may be deleted if it is a data placeholder that # is no longer needed by future Operations. @@ -197,12 +219,16 @@ def _build_execution_plan(self, dag, inputs): if self._debug: print("checking if node %s can be deleted" % need) for future_node in ordered_nodes[i+1:]: - if isinstance(future_node, Operation) and need in future_node.needs: + if ( + isinstance(future_node, Operation) + and need in future_node.needs + ): break else: - if self._debug: - print(" adding delete instruction for %s" % need) - plan.append(DeleteInstruction(need)) + if need not in outputs: + if self._debug: + print(" adding delete instruction for %s" % need) + plan.append(DeleteInstruction(need)) else: raise AssertionError("Unrecognized network graph node %r" % node) @@ -317,7 +343,7 @@ def _solve_dag(self, outputs, inputs): unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied) - plan = self._build_execution_plan(pruned_dag, inputs) + plan = self._build_execution_plan(pruned_dag, inputs, outputs) return plan @@ -354,7 +380,7 @@ def compile(self, outputs=(), inputs=()): def compute( self, outputs, named_inputs, method=None, overwrites_collector=None): """ - Run the graph. Any inputs to the network must be passed in by name. + Solve & execute the graph, sequentially or parallel. :param list output: The names of the data node you'd like to have returned once all necessary computations are complete. @@ -389,11 +415,11 @@ def compute( # choose a method of execution if method == "parallel": - self._compute_thread_pool_barrier_method( + self._execute_thread_pool_barrier_method( cache, overwrites_collector, named_inputs) else: - self._compute_sequential_method( - cache, overwrites_collector, named_inputs, outputs) + self._execute_sequential_method( + cache, overwrites_collector, named_inputs) if not outputs: # Return the whole cache as output, including input and @@ -415,7 +441,7 @@ def _pin_data_in_cache(self, value_name, cache, inputs, overwrites): cache[value_name] = inputs[value_name] - def _compute_thread_pool_barrier_method( + def _execute_thread_pool_barrier_method( self, cache, overwrites, inputs, thread_pool_size=10 ): """ @@ -432,7 +458,7 @@ def _compute_thread_pool_barrier_method( # this keeps track of all nodes that have already executed - has_executed = set() # unordered, not iterated + executed_nodes = set() # unordered, not iterated # with each loop iteration, we determine a set of operations that can be # scheduled, then schedule them onto a thread pool, then collect their @@ -443,21 +469,30 @@ def _compute_thread_pool_barrier_method( # in the current round of scheduling upnext = [] for node in self.execution_plan: - # only delete if all successors for the data node have been executed - if isinstance(node, DeleteInstruction): - if ready_to_delete_data_node(node, - has_executed, - self.graph): - if node in cache: - cache.pop(node) - - # continue if this node is anything but an operation node - if not isinstance(node, Operation): - continue - - if ready_to_schedule_operation(node, has_executed, self.graph) \ - and node not in has_executed: + if ( + isinstance(node, Operation) + and self._can_schedule_operation(node, executed_nodes) + and node not in executed_nodes + ): upnext.append(node) + elif isinstance(node, DeleteInstruction): + # Only delete if all successors for the data node + # have been executed. + # An optional need may not have a value in the cache. + if ( + node in cache + and self._can_evict_value(node, executed_nodes) + ): + if self._debug: + print("removing data '%s' from cache." % node) + del cache[node] + elif isinstance(node, PinInstruction): + # Always and repeatedely pin the value, even if not all + # providers of the data have executed. + # An optional need may not have a value in the cache. + if node in cache: + self._pin_data_in_cache(node, cache, inputs, overwrites) + # stop if no nodes left to schedule, exit out of the loop @@ -469,10 +504,10 @@ def _compute_thread_pool_barrier_method( upnext) for op, result in done_iterator: cache.update(result) - has_executed.add(op) + executed_nodes.add(op) - def _compute_sequential_method(self, cache, overwrites, inputs, outputs): + def _execute_sequential_method(self, cache, overwrites, inputs): """ This method runs the graph one operation at a time in a single thread """ @@ -500,18 +535,12 @@ def _compute_sequential_method(self, cache, overwrites, inputs, outputs): if self._debug: print("step completion time: %s" % t_complete) - # Process DeleteInstructions by deleting the corresponding data - # if possible. elif isinstance(step, DeleteInstruction): - - if outputs and step not in outputs: - # Some DeleteInstruction steps may not exist in the cache - # if they come from optional() needs that are not privoded - # as inputs. Make sure the step exists before deleting. - if step in cache: - if self._debug: - print("removing data '%s' from cache." % step) - cache.pop(step) + # Cache value may be missing if it is optional. + if step in cache: + if self._debug: + print("removing data '%s' from cache." % step) + del cache[step] elif isinstance(step, PinInstruction): self._pin_data_in_cache(step, cache, inputs, overwrites) @@ -550,7 +579,7 @@ def get_node_name(a): g = pydot.Dot(graph_type="digraph") # draw nodes - for nx_node in self.graph.nodes(): + for nx_node in self.graph.nodes: if isinstance(nx_node, DataPlaceholderNode): node = pydot.Node(name=nx_node, shape="rect") else: @@ -592,50 +621,45 @@ def get_node_name(a): return g -def ready_to_schedule_operation(op, has_executed, graph): - """ - Determines if a Operation is ready to be scheduled for execution based on - what has already been executed. + def _can_schedule_operation(self, op, executed_nodes): + """ + Determines if a Operation is ready to be scheduled for execution + + based on what has already been executed. - Args: - op: + :param op: The Operation object to check - has_executed: set + :param set executed_nodes A set containing all operations that have been executed so far - graph: - The networkx graph containing the operations and data nodes - Returns: - A boolean indicating whether the operation may be scheduled for - execution based on what has already been executed. - """ - # unordered, not iterated - dependencies = set(filter(lambda v: isinstance(v, Operation), - nx.ancestors(graph, op))) - return dependencies.issubset(has_executed) + :return: + A boolean indicating whether the operation may be scheduled for + execution based on what has already been executed. + """ + # unordered, not iterated + dependencies = set(n for n in nx.ancestors(self.graph, op) + if isinstance(n, Operation)) + return dependencies.issubset(executed_nodes) -def ready_to_delete_data_node(name, has_executed, graph): - """ - Determines if a DataPlaceholderNode is ready to be deleted from the - cache. - Args: - name: + def _can_evict_value(self, name, executed_nodes): + """ + Determines if a DataPlaceholderNode is ready to be deleted from cache. + + :param name: The name of the data node to check - has_executed: set + :param executed_nodes: set A set containing all operations that have been executed so far - graph: - The networkx graph containing the operations and data nodes - Returns: - A boolean indicating whether the data node can be deleted or not. - """ - data_node = get_data_node(name, graph) - return set(graph.successors(data_node)).issubset(has_executed) + :return: + A boolean indicating whether the data node can be deleted or not. + """ + data_node = self.get_data_node(name) + return data_node and set( + self.graph.successors(data_node)).issubset(executed_nodes) -def get_data_node(name, graph): - """ - Gets a data node from a graph using its name - """ - for node in graph.nodes(): - if node == name and isinstance(node, DataPlaceholderNode): + def get_data_node(self, name): + """ + Retuen the data node from a graph using its name, or None. + """ + node = self.graph.nodes[name] + if isinstance(node, DataPlaceholderNode): return node - return None diff --git a/test/test_graphkit.py b/test/test_graphkit.py index ce9b80d6..0afea72d 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -11,6 +11,7 @@ import graphkit.network as network import graphkit.modifiers as modifiers from graphkit import operation, compose, Operation +from graphkit.network import DeleteInstruction def scream(*args, **kwargs): @@ -206,37 +207,37 @@ def test_pruning_raises_for_bad_output(): def test_pruning_not_overrides_given_intermediate(): # Test #25: v1.2.4 overwrites intermediate data when no output asked - netop = compose(name="netop")( + pipeline = compose(name="pipeline")( operation(name="unjustly run", needs=["a"], provides=["overriden"])(scream), operation(name="op", needs=["overriden", "c"], provides=["asked"])(add), ) exp = {"a": 5, "overriden": 1, "c": 2, "asked": 3} # v1.2.4.ok - assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") # FAILs # - on v1.2.4 with (overriden, asked): = (5, 7) instead of (1, 3) # - on #18(unsatisfied) + #23(ordered-sets) with (overriden, asked) = (5, 7) instead of (1, 3) # FIXED on #26 - assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp ## Test OVERWITES # overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") assert overwrites == {} # unjust must have been pruned - + overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp assert overwrites == {} # unjust must have been pruned def test_pruning_multiouts_not_override_intermediates1(): # Test #25: v.1.2.4 overwrites intermediate data when a previous operation # must run for its other outputs (outputs asked or not) - netop = compose(name="netop")( + pipeline = compose(name="pipeline")( operation(name="must run", needs=["a"], provides=["overriden", "calced"]) (lambda x: (x, 2 * x)), operation(name="add", needs=["overriden", "calced"], provides=["asked"])(add), @@ -247,35 +248,36 @@ def test_pruning_multiouts_not_override_intermediates1(): # - on v1.2.4 with (overriden, asked) = (5, 15) instead of (1, 11) # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. # FIXED on #26 - assert netop({"a": 5, "overriden": 1}) == exp + assert pipeline({"a": 5, "overriden": 1}) == exp # FAILs # - on v1.2.4 with KeyError: 'e', # - on #18(unsatisfied) + #23(ordered-sets) with empty result. # FIXED on #26 - assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") ## Test OVERWITES # overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"a": 5, "overriden": 1}) == exp + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1}) == exp assert overwrites == {'overriden': 5} overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") assert overwrites == {'overriden': 5} - # ## Test parallel - # netop.set_execution_method("parallel") - # assert netop({"a": 5, "overriden": 1}) == exp - # assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + ## Test parallel + # + pipeline.set_execution_method("parallel") + assert pipeline({"a": 5, "overriden": 1}) == exp + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") def test_pruning_multiouts_not_override_intermediates2(): # Test #25: v.1.2.4 overrides intermediate data when a previous operation # must run for its other outputs (outputs asked or not) - netop = compose(name="netop")( + pipeline = compose(name="pipeline")( operation(name="must run", needs=["a"], provides=["overriden", "e"]) (lambda x: (x, 2 * x)), operation(name="op1", needs=["overriden", "c"], provides=["d"])(add), @@ -287,30 +289,36 @@ def test_pruning_multiouts_not_override_intermediates2(): # - on v1.2.4 with (overriden, asked) = (5, 70) instead of (1, 13) # - on #18(unsatisfied) + #23(ordered-sets) like v1.2.4. # FIXED on #26 - assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp # FAILs # - on v1.2.4 with KeyError: 'e', # - on #18(unsatisfied) + #23(ordered-sets) with empty result. - assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") # FIXED on #26 ## Test OVERWITES # overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"a": 5, "overriden": 1, "c": 2}) == exp + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp assert overwrites == {'overriden': 5} overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") assert overwrites == {'overriden': 5} + ## Test parallel + # + pipeline.set_execution_method("parallel") + assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp + assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + def test_pruning_with_given_intermediate_and_asked_out(): # Test #24: v1.2.4 does not prune before given intermediate data when # outputs not asked, but does so when output asked. - netop = compose(name="netop")( + pipeline = compose(name="pipeline")( operation(name="unjustly pruned", needs=["given-1"], provides=["a"])(identity), operation(name="shortcuted", needs=["a", "b"], provides=["given-2"])(add), operation(name="good_op", needs=["a", "given-2"], provides=["asked"])(add), @@ -318,55 +326,83 @@ def test_pruning_with_given_intermediate_and_asked_out(): exp = {"given-1": 5, "b": 2, "given-2": 2, "a": 5, "asked": 7} # v1.2.4 is ok - assert netop({"given-1": 5, "b": 2, "given-2": 2}) == exp + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp # FAILS # - on v1.2.4 with KeyError: 'a', # - on #18 (unsatisfied) with no result. # FIXED on #18+#26 (new dag solver). - assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") ## Test OVERWITES # overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"given-1": 5, "b": 2, "given-2": 2}) == exp + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp assert overwrites == {} overwrites = {} - netop.set_overwrites_collector(overwrites) - assert netop({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") assert overwrites == {} + ## Test parallel + # + pipeline.set_execution_method("parallel") + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + def test_unsatisfied_operations(): # Test that operations with partial inputs are culled and not failing. - netop = compose(name="netop")( + pipeline = compose(name="pipeline")( operation(name="add", needs=["a", "b1"], provides=["a+b1"])(add), operation(name="sub", needs=["a", "b2"], provides=["a-b2"])(sub), ) exp = {"a": 10, "b1": 2, "a+b1": 12} - assert netop({"a": 10, "b1": 2}) == exp - assert netop({"a": 10, "b1": 2}, outputs=["a+b1"]) == filtdict(exp, "a+b1") + assert pipeline({"a": 10, "b1": 2}) == exp + assert pipeline({"a": 10, "b1": 2}, outputs=["a+b1"]) == filtdict(exp, "a+b1") exp = {"a": 10, "b2": 2, "a-b2": 8} - assert netop({"a": 10, "b2": 2}) == exp - assert netop({"a": 10, "b2": 2}, outputs=["a-b2"]) == filtdict(exp, "a-b2") + assert pipeline({"a": 10, "b2": 2}) == exp + assert pipeline({"a": 10, "b2": 2}, outputs=["a-b2"]) == filtdict(exp, "a-b2") + + ## Test parallel + # + pipeline.set_execution_method("parallel") + exp = {"a": 10, "b1": 2, "a+b1": 12} + assert pipeline({"a": 10, "b1": 2}) == exp + assert pipeline({"a": 10, "b1": 2}, outputs=["a+b1"]) == filtdict(exp, "a+b1") + + exp = {"a": 10, "b2": 2, "a-b2": 8} + assert pipeline({"a": 10, "b2": 2}) == exp + assert pipeline({"a": 10, "b2": 2}, outputs=["a-b2"]) == filtdict(exp, "a-b2") def test_unsatisfied_operations_same_out(): # Test unsatisfied pairs of operations providing the same output. - netop = compose(name="netop")( + pipeline = compose(name="pipeline")( operation(name="mul", needs=["a", "b1"], provides=["ab"])(mul), operation(name="div", needs=["a", "b2"], provides=["ab"])(floordiv), operation(name="add", needs=["ab", "c"], provides=["ab_plus_c"])(add), ) exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} - assert netop({"a": 10, "b1": 2, "c": 1}) == exp - assert netop({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp + assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + + exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} + assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp + assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + + ## Test parallel + # + pipeline.set_execution_method("parallel") + exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} + assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp + assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} - assert netop({"a": 10, "b2": 2, "c": 1}) == exp - assert netop({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp + assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") def test_optional(): @@ -413,7 +449,10 @@ def addplusplus(a, b, c=0): def test_deleteinstructs_vary_with_inputs(): # Check #21: DeleteInstructions positions vary when inputs change. - netop = compose(name="netop")( + def count_deletions(steps): + return sum(isinstance(n, DeleteInstruction) for n in steps) + + pipeline = compose(name="pipeline")( operation(name="a free without b", needs=["a"], provides=["aa"])(identity), operation(name="satisfiable", needs=["a", "b"], provides=["ab"])(add), operation(name="optional ab", needs=["aa", modifiers.optional("ab")], provides=["asked"]) @@ -422,43 +461,56 @@ def test_deleteinstructs_vary_with_inputs(): inp = {"a": 2, "b": 3} exp = inp.copy(); exp.update({"aa": 2, "ab": 5, "asked": 7}) - res = netop(inp) + res = pipeline(inp) assert res == exp # ok - steps11 = netop.net.execution_plan - res = netop(inp, outputs=["asked"]) + steps11 = pipeline.net.execution_plan + res = pipeline(inp, outputs=["asked"]) assert res == filtdict(exp, "asked") # ok - steps12 = netop.net.execution_plan + steps12 = pipeline.net.execution_plan inp = {"a": 2} exp = inp.copy(); exp.update({"aa": 2, "asked": 12}) - res = netop(inp) + res = pipeline(inp) assert res == exp # ok - steps21 = netop.net.execution_plan - res = netop(inp, outputs=["asked"]) + steps21 = pipeline.net.execution_plan + res = pipeline(inp, outputs=["asked"]) assert res == filtdict(exp, "asked") # ok - steps22 = netop.net.execution_plan + steps22 = pipeline.net.execution_plan + + # When no outs, no del-instructs. + assert steps11 != steps12 + assert count_deletions(steps11) == 0 + assert steps21 != steps22 + assert count_deletions(steps21) == 0 + + # Check steps vary with inputs + # + # FAILs in v1.2.4 + #18, PASS in #26 + assert steps11 != steps21 - assert steps11 == steps12 - assert steps21 == steps22 - assert steps11 != steps21 # FAILs in v1.2.4 + #18 - assert steps12 != steps22 # FAILs in v1.2.4 + #18 + # Check deletes vary with inputs + # + # FAILs in v1.2.4 + #18, PASS in #26 + assert count_deletions(steps12) != count_deletions(steps22) def test_parallel_execution(): import time + delay = 0.5 + def fn(x): - time.sleep(1) + time.sleep(delay) print("fn %s" % (time.time() - t0)) return 1 + x def fn2(a,b): - time.sleep(1) + time.sleep(delay) print("fn2 %s" % (time.time() - t0)) return a+b def fn3(z, k=1): - time.sleep(1) + time.sleep(delay) print("fn3 %s" % (time.time() - t0)) return z + k @@ -527,8 +579,8 @@ def infer(i): assert tuple(sorted(results.keys())) == tuple(sorted(outputs)), (outputs, results) return results - N = 100 - for i in range(20, 200): + N = 33 + for i in range(13, 61): pool = Pool(i) pool.map(infer, range(N)) pool.close() From 1cc733ef8c51e62a9e53b0f4ddf3d21138c4ac3b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 00:33:06 +0300 Subject: [PATCH 031/167] enh(CI): +PY3.6 where dicts are stable --- .travis.yml | 1 + test/test_graphkit.py | 24 +++++++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index d8657a8f..3350051a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - "2.7" - "3.4" - "3.5" + - "3.6" install: - pip install Sphinx sphinx_rtd_theme codecov packaging diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 0afea72d..cc0221d2 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -346,10 +346,11 @@ def test_pruning_with_given_intermediate_and_asked_out(): assert overwrites == {} ## Test parallel + # FAIL! in #26! # - pipeline.set_execution_method("parallel") - assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp - assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + # pipeline.set_execution_method("parallel") + # assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp + # assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") def test_unsatisfied_operations(): # Test that operations with partial inputs are culled and not failing. @@ -394,15 +395,16 @@ def test_unsatisfied_operations_same_out(): assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") ## Test parallel + # FAIL! in #26 # - pipeline.set_execution_method("parallel") - exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} - assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp - assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") - - exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} - assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp - assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + # pipeline.set_execution_method("parallel") + # exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} + # assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp + # assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + + # exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} + # assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp + # assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") def test_optional(): From cd1370b9c6ee022ff056e020633b15c1089fbf87 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 01:20:28 +0300 Subject: [PATCH 032/167] TEST(plot,ci): test plotting; pip install extras in Travis --- .travis.yml | 7 ++++++- test/test_graphkit.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d8657a8f..588f64a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,11 +4,16 @@ python: - "2.7" - "3.4" - "3.5" +addons: + apt: + packages: + - graphviz + install: - pip install Sphinx sphinx_rtd_theme codecov packaging - "python -c $'import os, packaging.version as version\\nv = version.parse(os.environ.get(\"TRAVIS_TAG\", \"1.0\")).public\\nwith open(\"VERSION\", \"w\") as f: f.write(v)'" - - python setup.py install + - pip install .[plot] - cd docs - make clean html - cd .. diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bd97b317..b4681121 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -3,6 +3,10 @@ import math import pickle +import os.path as osp +import shutil +import tempfile + from pprint import pprint from operator import add @@ -317,6 +321,22 @@ def infer(i): pool.close() +def test_plotting(): + sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) + sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) + sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) + net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) + + for ext in ".png .dot .jpg .jpeg .pdf .svg".split(): + tdir = tempfile.mkdtemp(suffix=ext) + png_file = osp.join(tdir, "workflow.png") + net1.net.plot(png_file) + try: + assert osp.exists(png_file) + finally: + shutil.rmtree(tdir, ignore_errors=True) + + #################################### # Backwards compatibility #################################### From f6766627bb5e5f0226d3625cd4d6906655e7ff56 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 01:38:41 +0300 Subject: [PATCH 033/167] fix(plot): don't create file on unsupported formats thanks to @andres-fr. --- graphkit/network.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 33b8363e..9280a891 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -422,23 +422,23 @@ def get_node_name(a): # save plot if filename: + supported_plot_formaters = { + ".png": g.create_png, + ".dot": g.to_string, + ".jpg": g.create_jpeg, + ".jpeg": g.create_jpeg, + ".pdf": g.create_pdf, + ".svg": g.create_svg, + } _basename, ext = os.path.splitext(filename) + plot_formater = supported_plot_formaters.get(ext.lower()) + if not plot_formater: + raise Exception( + "Unknown file format for saving graph: %s" + " File extensions must be one of: .png .dot .jpg .jpeg .pdf .svg" + % ext) with open(filename, "wb") as fh: - if ext.lower() == ".png": - fh.write(g.create_png()) - elif ext.lower() == ".dot": - fh.write(g.to_string()) - elif ext.lower() in [".jpg", ".jpeg"]: - fh.write(g.create_jpeg()) - elif ext.lower() == ".pdf": - fh.write(g.create_pdf()) - elif ext.lower() == ".svg": - fh.write(g.create_svg()) - else: - raise Exception( - "Unknown file format for saving graph: %s" - " File extensions must be one of: .png .dot .jpg .jpeg .pdf .svg" - % ext) + fh.write(plot_formater()) # display graph via matplotlib if show: From 65d1816b39a08fc9dfd12910d68e3ebcb68aba06 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 30 Sep 2019 01:54:03 +0300 Subject: [PATCH 034/167] enh(plot.TC): expose supported writers and TC on them --- graphkit/network.py | 32 ++++++++++++++++++-------------- test/test_graphkit.py | 11 ++++++++++- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 9280a891..f0c7444a 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -375,6 +375,17 @@ def _compute_sequential_method(self, named_inputs, outputs): return {k: cache[k] for k in iter(cache) if k in outputs} + @staticmethod + def supported_plot_writers(): + return { + ".png": lambda gplot: gplot.create_png(), + ".dot": lambda gplot: gplot.to_string(), + ".jpg": lambda gplot: gplot.create_jpeg(), + ".jpeg": lambda gplot: gplot.create_jpeg(), + ".pdf": lambda gplot: gplot.create_pdf(), + ".svg": lambda gplot: gplot.create_svg(), + } + def plot(self, filename=None, show=False): """ Plot the graph. @@ -422,23 +433,16 @@ def get_node_name(a): # save plot if filename: - supported_plot_formaters = { - ".png": g.create_png, - ".dot": g.to_string, - ".jpg": g.create_jpeg, - ".jpeg": g.create_jpeg, - ".pdf": g.create_pdf, - ".svg": g.create_svg, - } _basename, ext = os.path.splitext(filename) - plot_formater = supported_plot_formaters.get(ext.lower()) - if not plot_formater: - raise Exception( + writers = Network.supported_plot_writers() + plot_writer = Network.supported_plot_writers().get(ext.lower()) + if not plot_writer: + raise ValueError( "Unknown file format for saving graph: %s" - " File extensions must be one of: .png .dot .jpg .jpeg .pdf .svg" - % ext) + " File extensions must be one of: %s" + % (ext, ' '.join(writers))) with open(filename, "wb") as fh: - fh.write(plot_formater()) + fh.write(plot_writer(g)) # display graph via matplotlib if show: diff --git a/test/test_graphkit.py b/test/test_graphkit.py index b4681121..bdd0ab37 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -327,7 +327,7 @@ def test_plotting(): sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) - for ext in ".png .dot .jpg .jpeg .pdf .svg".split(): + for ext in network.Network.supported_plot_writers(): tdir = tempfile.mkdtemp(suffix=ext) png_file = osp.join(tdir, "workflow.png") net1.net.plot(png_file) @@ -335,6 +335,15 @@ def test_plotting(): assert osp.exists(png_file) finally: shutil.rmtree(tdir, ignore_errors=True) + try: + net1.net.plot('bad.format') + assert False, "Should had failed writting arbitrary file format!" + except ValueError as ex: + assert "Unknown file format" in str(ex) + + ## Check help msg lists all siupported formats + for ext in network.Network.supported_plot_writers(): + assert ext in str(ex) #################################### From d403783c95246cad3c97c60058b1b76d1169011e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 4 Oct 2019 07:01:35 +0300 Subject: [PATCH 035/167] WIP/FIX(PIN): PARALLEL DELs decide on PRUNED-dag (not full)... - WIP: x4 TCs FAIL and still not discovered th bug :-( + BUT ALL+AUGMENTED PARALLEL TCs pass (#26 were failing some) + refact: net stores also `pruned_dag` (not only `steps`). + refact: _solve_dag() --> _prune_dag(). + doc: +a lot. + TODO: store pruned_dag in own ExePlan class. --- graphkit/network.py | 153 ++++++++++++++++++++++++------------------ test/test_graphkit.py | 37 +++++++--- 2 files changed, 112 insertions(+), 78 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index d5c15539..114c945a 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -54,54 +54,65 @@ def __repr__(self): class Network(object): """ - Assemble operations & data into a directed-acyclic-graph (DAG) and run them + Assemble operations & data into a directed-acyclic-graph (DAG) to run them. - based on the given input values and requested outputs. + The execution of the contained *operations* in the dag (the computation) + is splitted in 2 phases: - The execution of *operations* (a computation) is splitted in 2 phases: - - - COMPILE: prune, sort topologically the nodes in the dag, solve it, and + - COMPILE: prune unsatisfied nodes, sort dag topologically & solve it, and derive the *execution plan* (see below) based on the given *inputs* and asked *outputs*. - EXECUTE: sequential or parallel invocation of the underlying functions - of the operations. - - is based on 4 data-structures: - - - the ``networkx`` :attr:`graph` DAG, containing interchanging layers of - :class:`Operation` and :class:`DataPlaceholderNode` nodes. - They are layed out and connected by repeated calls of :meth:`add_OP`. - - The computation starts with :meth:`_solve_dag()` extracting - a *DAG subgraph* by *pruning* nodes based on given inputs and - requested outputs. - This subgraph is used to decide the `execution_plan` (see below), and - and is cached in :attr:`_cached_execution_plans` across runs with - inputs/outputs as key. - - - the :attr:`execution_plan` is the list of the operation-nodes only - from the dag (above), topologically sorted, and interspersed with - *instructions steps* needed to complete the run. - It is built by :meth:`_build_execution_plan()` based on the subgraph dag - extracted above. The *instructions* items achieve the following: - - - :class:`DeleteInstruction`: delete items from values-cache as soon as - they are not needed further down the dag, to reduce memory footprint - while computing. - - - :class:`PinInstruction`: avoid overwritting any given intermediate - inputs, and still allow their providing operations to run - (because they are needed for their other outputs). - - - the :var:`cache` local-var in :meth:`compute()`, initialized on each run - to hold the values of the given inputs, generated (aka intermediate) data, - and output values. - - - the :var:`overwrites` local-var, initialized on each run of both - ``_compute_xxx`` methods (for parallel or sequential executions), to - hold values calculated but overwritten (aka "pinned") by intermediate - input-values. + of the operations with arguments from the ``cache``. + + is based on 5 data-structures: + + :ivar graph: + A ``networkx`` DAG containing interchanging layers of + :class:`Operation` and :class:`DataPlaceholderNode` nodes. + They are layed out and connected by repeated calls of :meth:`add_OP`. + + The computation starts with :meth:`_prune_dag()` extracting + a *DAG subgraph* by *pruning* its nodes based on given inputs and + requested outputs in :meth:`compute()`. + :ivar execution_dag: + It contains the nodes of the *pruned dag* from the last call to + :meth:`compile()`. This pruned subgraph is used to decide + the :attr:`execution_plan` (below). + It is cached in :attr:`_cached_compilations` across runs with + inputs/outputs as key. + + :ivar execution_plan: + It is the list of the operation-nodes only + from the dag (above), topologically sorted, and interspersed with + *instructions steps* needed to complete the run. + It is built by :meth:`_build_execution_plan()` based on the subgraph dag + extracted above. + It is cached in :attr:`_cached_compilations` across runs with + inputs/outputs as key. + + The *instructions* items achieve the following: + + - :class:`DeleteInstruction`: delete items from values-cache as soon as + they are not needed further down the dag, to reduce memory footprint + while computing. + + - :class:`PinInstruction`: avoid overwritting any given intermediate + inputs, and still allow their providing operations to run + (because they are needed for their other outputs). + + :var cache: + a local-var in :meth:`compute()`, initialized on each run + to hold the values of the given inputs, generated (intermediate) data, + and output values. + It is returned as is if no specific outputs requested; no data-eviction + happens then. + + :arg overwrites: + The optional argument given to :meth:`compute()` to colect the + intermediate *calculated* values that are overwritten by intermediate + (aka "pinned") input-values. """ @@ -119,11 +130,14 @@ def __init__(self, **kwargs): #: The list of operation-nodes & *instructions* needed to evaluate #: the given inputs & asked outputs, free memory and avoid overwritting #: any given intermediate inputs. - self.execution_plan = [] + self.execution_plan = () + + #: Pruned graph of the last compilation. + self.execution_dag = () #: Speed up :meth:`compile()` call and avoid a multithreading issue(?) #: that is occuring when accessing the dag in networkx. - self._cached_execution_plans = {} + self._cached_compilations = {} def add_op(self, operation): @@ -143,8 +157,9 @@ def add_op(self, operation): # assert layer is only added once to graph assert operation not in self.graph.nodes, "Operation may only be added once" - ## Invalidate old plans. - self._cached_execution_plans = {} + self.execution_dag = None + self.execution_plan = None + self._cached_compilations = {} # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: @@ -246,11 +261,11 @@ def _collect_unsatisfied_operations(self, dag, inputs): all its needs have been accounted, so we can get its satisfaction. - Their provided outputs are not linked to any data in the dag. - An operation might not have any output link when :meth:`_solve_dag()` + An operation might not have any output link when :meth:`_prune_dag()` has broken them, due to given intermediate inputs. :param dag: - the graph to consider + a graph with broken edges those arriving to existing inputs :param inputs: an iterable of the names of the input values return: @@ -288,13 +303,12 @@ def _collect_unsatisfied_operations(self, dag, inputs): return unsatisfied - def _solve_dag(self, outputs, inputs): + def _prune_dag(self, outputs, inputs): """ Determines what graph steps need to run to get to the requested - outputs from the provided inputs. Eliminates steps that come before - (in topological order) any inputs that have been provided. Also - eliminates steps that are not on a path from the provided inputs to - the requested outputs. + outputs from the provided inputs. : + - Eliminate steps that are not on a path arriving to requested outputs. + - Eliminate unsatisfied operations: partial inputs or no outputs needed. :param iterable outputs: A list of desired output names. This can also be ``None``, in which @@ -305,7 +319,7 @@ def _solve_dag(self, outputs, inputs): The inputs names of all given inputs. :return: - the *execution plan* + the *pruned_dag* """ dag = self.graph @@ -341,18 +355,16 @@ def _solve_dag(self, outputs, inputs): # Prune unsatisfied operations (those with partial inputs or no outputs). unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) - pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied) + pruned_dag = dag.subgraph(self.graph.nodes - unsatisfied) - plan = self._build_execution_plan(pruned_dag, inputs, outputs) - - return plan + return pruned_dag.copy() # clone so that it is picklable def compile(self, outputs=(), inputs=()): """ Solve dag, set the :attr:`execution_plan`, and cache it. - See :meth:`_solve_dag()` for detailed description. + See :meth:`_prune_dag()` for detailed description. :param iterable outputs: A list of desired output names. This can also be ``None``, in which @@ -368,12 +380,20 @@ def compile(self, outputs=(), inputs=()): outputs = tuple(sorted(outputs)) inputs_keys = tuple(sorted(inputs)) cache_key = (inputs_keys, outputs) - if cache_key in self._cached_execution_plans: - self.execution_plan = self._cached_execution_plans[cache_key] + + if cache_key in self._cached_compilations: + dag, plan = self._cached_compilations[cache_key] else: - plan = self._solve_dag(outputs, inputs) - # save this result in a precomputed cache for future lookup - self.execution_plan = self._cached_execution_plans[cache_key] = plan + dag = self._prune_dag(outputs, inputs) + plan = self._build_execution_plan(dag, inputs, outputs) + + # Cache compilation results to speed up future runs + # with different values (but same number of inputs/outputs). + self._cached_compilations[cache_key] = dag, plan + + ## TODO: Extract into Solution class + self.execution_dag = dag + self.execution_plan = plan @@ -494,7 +514,6 @@ def _execute_thread_pool_barrier_method( self._pin_data_in_cache(node, cache, inputs, overwrites) - # stop if no nodes left to schedule, exit out of the loop if len(upnext) == 0: break @@ -636,7 +655,7 @@ def _can_schedule_operation(self, op, executed_nodes): execution based on what has already been executed. """ # unordered, not iterated - dependencies = set(n for n in nx.ancestors(self.graph, op) + dependencies = set(n for n in nx.ancestors(self.execution_dag, op) if isinstance(n, Operation)) return dependencies.issubset(executed_nodes) @@ -654,7 +673,7 @@ def _can_evict_value(self, name, executed_nodes): """ data_node = self.get_data_node(name) return data_node and set( - self.graph.successors(data_node)).issubset(executed_nodes) + self.execution_dag.successors(data_node)).issubset(executed_nodes) def get_data_node(self, name): """ diff --git a/test/test_graphkit.py b/test/test_graphkit.py index cc0221d2..ba7f2a0f 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -233,6 +233,19 @@ def test_pruning_not_overrides_given_intermediate(): assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp assert overwrites == {} # unjust must have been pruned + ## Test Parallel + # + pipeline.set_execution_method("parallel") + overwrites = {} + pipeline.set_overwrites_collector(overwrites) + #assert pipeline({"a": 5, "overriden": 1, "c": 2}, ["asked"]) == filtdict(exp, "asked") + assert overwrites == {} # unjust must have been pruned + + overwrites = {} + pipeline.set_overwrites_collector(overwrites) + assert pipeline({"a": 5, "overriden": 1, "c": 2}) == exp + assert overwrites == {} # unjust must have been pruned + def test_pruning_multiouts_not_override_intermediates1(): # Test #25: v.1.2.4 overwrites intermediate data when a previous operation @@ -348,9 +361,9 @@ def test_pruning_with_given_intermediate_and_asked_out(): ## Test parallel # FAIL! in #26! # - # pipeline.set_execution_method("parallel") - # assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp - # assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + pipeline.set_execution_method("parallel") + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") def test_unsatisfied_operations(): # Test that operations with partial inputs are culled and not failing. @@ -395,16 +408,17 @@ def test_unsatisfied_operations_same_out(): assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") ## Test parallel + # # FAIL! in #26 + pipeline.set_execution_method("parallel") + exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} + assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp + assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") # - # pipeline.set_execution_method("parallel") - # exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} - # assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp - # assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") - - # exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} - # assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp - # assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + # FAIL! in #26 + exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} + assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp + assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") def test_optional(): @@ -624,6 +638,7 @@ def compute(self, inputs): outputs.append(p) return outputs + def test_backwards_compatibility(): sum_op1 = Sum( From 64e00286da1ab68002169ba8598485e54b4ff166 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 4 Oct 2019 20:36:32 +0300 Subject: [PATCH 036/167] WIP/+ExecPlan class... - SAME x4 TCs FAIL like parent. + refact: revive net.steps --> net.last_plan + BREAK: inverse the order of outputs/inputs args in net.compute() + refact: use iset() to merge. --- graphkit/base.py | 5 +- graphkit/functional.py | 17 +- graphkit/network.py | 623 ++++++++++++++++++++++------------------- test/test_graphkit.py | 16 +- 4 files changed, 357 insertions(+), 304 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 2e036468..7631be8a 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -160,11 +160,14 @@ def __init__(self, **kwargs): def _compute(self, named_inputs, outputs=None): return self.net.compute( - outputs, named_inputs, method=self._execution_method, + named_inputs, outputs, method=self._execution_method, overwrites_collector=self._overwrites_collector) def __call__(self, *args, **kwargs): return self._compute(*args, **kwargs) + + def compile(self, *args, **kwargs): + return self.net.compile(*args, **kwargs) def set_execution_method(self, method): """ diff --git a/graphkit/functional.py b/graphkit/functional.py index c113a298..b7e4bd57 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -190,22 +190,15 @@ def __call__(self, *operations): merge_set = iset() # Preseve given node order. for op in operations: if isinstance(op, NetworkOperation): - op.net.compile() - net_ops = filter(lambda x: isinstance(x, Operation), - op.net.execution_plan) - merge_set.update(net_ops) + plan = op.net.compile() + merge_set.update(s for s in plan.steps + if isinstance(s, Operation)) else: merge_set.add(op) operations = merge_set - def order_preserving_uniquifier(seq, seen=None): - seen = seen if seen else set() # unordered, not iterated - seen_add = seen.add - return [x for x in seq if not (x in seen or seen_add(x))] - - provides = order_preserving_uniquifier(chain(*[op.provides for op in operations])) - needs = order_preserving_uniquifier(chain(*[op.needs for op in operations]), - set(provides)) # unordered, not iterated + provides = iset(chain(*[op.provides for op in operations])) + needs = iset(chain(*[op.needs for op in operations])) - provides # Build network net = Network() diff --git a/graphkit/network.py b/graphkit/network.py index 114c945a..a9980f49 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -1,11 +1,74 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -"""" The main implementation of the network of operations & data to compute. """ -import time +"""" +The main implementation of the network of operations & data to compute. + +The execution of network *operations* (aka computation) is splitted +in 2 phases: + +- COMPILE: prune unsatisfied nodes, sort dag topologically & solve it, and + derive the *execution steps* (see below) based on the given *inputs* + and asked *outputs*. + +- EXECUTE: sequential or parallel invocation of the underlying functions + of the operations with arguments from the ``solution``. + +Computations are based on 5 data-structures: + +:attr:`Network.graph` + A ``networkx`` graph (yet a DAG) containing interchanging layers of + :class:`Operation` and :class:`DataPlaceholderNode` nodes. + They are layed out and connected by repeated calls of + :meth:`~Network.add_OP`. + + The computation starts with :meth:`~Network._prune_graph()` extracting + a *DAG subgraph* by *pruning* its nodes based on given inputs and + requested outputs in :meth:`~Network.compute()`. + +:attr:`ExecutionPlan.dag` + An directed-acyclic-graph containing the *pruned* nodes as build by + :meth:`~Network._prune_graph()`. This pruned subgraph is used to decide + the :attr:`ExecutionPlan.steps` (below). + The containing :class:`ExecutionPlan.steps` instance is cached + in :attr:`_cached_plans` across runs with inputs/outputs as key. + +:attr:`ExecutionPlan.steps` + It is the list of the operation-nodes only + from the dag (above), topologically sorted, and interspersed with + *instructions steps* needed to complete the run. + It is built by :meth:`~Network._build_execution_steps()` based on + the subgraph dag extracted above. + The containing :class:`ExecutionPlan.steps` instance is cached + in :attr:`_cached_plans` across runs with inputs/outputs as key. + + The *instructions* items achieve the following: + + - :class:`DeleteInstruction`: delete items from `solution` as soon as + they are not needed further down the dag, to reduce memory footprint + while computing. + + - :class:`PinInstruction`: avoid overwritting any given intermediate + inputs, and still allow their providing operations to run + (because they are needed for their other outputs). + +:var solution: + a local-var in :meth:`~Network.compute()`, initialized on each run + to hold the values of the given inputs, generated (intermediate) data, + and output values. + It is returned as is if no specific outputs requested; no data-eviction + happens then. + +:arg overwrites: + The optional argument given to :meth:`~Network.compute()` to colect the + intermediate *calculated* values that are overwritten by intermediate + (aka "pinned") input-values. +""" +import logging import os +import time import networkx as nx -from collections import defaultdict +from collections import defaultdict, namedtuple from io import StringIO from itertools import chain @@ -16,6 +79,8 @@ from .modifiers import optional +log = logging.getLogger(__name__) + class DataPlaceholderNode(str): """ Dag node naming a data-value produced or required by an operation. @@ -26,10 +91,10 @@ def __repr__(self): class DeleteInstruction(str): """ - Execution step to delete a computed value from the network's ``cache``. + Execution step to delete a computed value from the `solution`. - It is an :attr:`Network.execution_plan` step for the data-node `str` that - frees its data-value from ``cache`` after it is no longer needed, + It's a step in :attr:`ExecutionPlan.steps` for the data-node `str` that + frees its data-value from `solution` after it is no longer needed, to reduce memory footprint while computing the pipeline. """ def __repr__(self): @@ -38,12 +103,12 @@ def __repr__(self): class PinInstruction(str): """ - Execution step to replace a computed value in the ``cache`` from the inputs, + Execution step to replace a computed value in the `solution` from the inputs, and to store the computed one in the ``overwrites`` instead - (both ``cache`` & ``overwrites`` are local-vars in :meth:`Network.compute()`). + (both `solution` & ``overwrites`` are local-vars in :meth:`~Network.compute()`). - It is an :attr:`Network.execution_plan` step for the data-node `str` that + It's a step in :attr:`ExecutionPlan.steps` for the data-node `str` that ensures the corresponding intermediate input-value is not overwritten when its providing function(s) could not be pruned, because their other outputs are needed elesewhere. @@ -56,89 +121,22 @@ class Network(object): """ Assemble operations & data into a directed-acyclic-graph (DAG) to run them. - The execution of the contained *operations* in the dag (the computation) - is splitted in 2 phases: - - - COMPILE: prune unsatisfied nodes, sort dag topologically & solve it, and - derive the *execution plan* (see below) based on the given *inputs* - and asked *outputs*. - - - EXECUTE: sequential or parallel invocation of the underlying functions - of the operations with arguments from the ``cache``. - - is based on 5 data-structures: - - :ivar graph: - A ``networkx`` DAG containing interchanging layers of - :class:`Operation` and :class:`DataPlaceholderNode` nodes. - They are layed out and connected by repeated calls of :meth:`add_OP`. - - The computation starts with :meth:`_prune_dag()` extracting - a *DAG subgraph* by *pruning* its nodes based on given inputs and - requested outputs in :meth:`compute()`. - :ivar execution_dag: - It contains the nodes of the *pruned dag* from the last call to - :meth:`compile()`. This pruned subgraph is used to decide - the :attr:`execution_plan` (below). - It is cached in :attr:`_cached_compilations` across runs with - inputs/outputs as key. - - :ivar execution_plan: - It is the list of the operation-nodes only - from the dag (above), topologically sorted, and interspersed with - *instructions steps* needed to complete the run. - It is built by :meth:`_build_execution_plan()` based on the subgraph dag - extracted above. - It is cached in :attr:`_cached_compilations` across runs with - inputs/outputs as key. - - The *instructions* items achieve the following: - - - :class:`DeleteInstruction`: delete items from values-cache as soon as - they are not needed further down the dag, to reduce memory footprint - while computing. - - - :class:`PinInstruction`: avoid overwritting any given intermediate - inputs, and still allow their providing operations to run - (because they are needed for their other outputs). - - :var cache: - a local-var in :meth:`compute()`, initialized on each run - to hold the values of the given inputs, generated (intermediate) data, - and output values. - It is returned as is if no specific outputs requested; no data-eviction - happens then. - - :arg overwrites: - The optional argument given to :meth:`compute()` to colect the - intermediate *calculated* values that are overwritten by intermediate - (aka "pinned") input-values. - """ def __init__(self, **kwargs): - """ - """ - # directed graph of layer instances and data-names defining the net. self.graph = nx.DiGraph() - self._debug = kwargs.get("debug", False) - # this holds the timing information for eache layer + # this holds the timing information for each layer self.times = {} - #: The list of operation-nodes & *instructions* needed to evaluate - #: the given inputs & asked outputs, free memory and avoid overwritting - #: any given intermediate inputs. - self.execution_plan = () - - #: Pruned graph of the last compilation. - self.execution_dag = () - #: Speed up :meth:`compile()` call and avoid a multithreading issue(?) #: that is occuring when accessing the dag in networkx. - self._cached_compilations = {} + self._cached_plans = {} + #: the execution_plan of the last call to :meth:`compile()`, + #: for debugging purposes. + self._last_plan = None def add_op(self, operation): """ @@ -157,9 +155,7 @@ def add_op(self, operation): # assert layer is only added once to graph assert operation not in self.graph.nodes, "Operation may only be added once" - self.execution_dag = None - self.execution_plan = None - self._cached_compilations = {} + self._cached_plans = {} # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: @@ -171,8 +167,9 @@ def add_op(self, operation): def list_layers(self, debug=False): + ## TODO: move to ExecutionPlan # Make a generic plan. - plan = self._build_execution_plan(self.graph, ()) + plan = self.compile() return [n for n in plan if debug or isinstance(n, Operation)] @@ -184,7 +181,7 @@ def show_layers(self, debug=False, ret=False): else: print(s) - def _build_execution_plan(self, dag, inputs, outputs): + def _build_execution_steps(self, dag, inputs, outputs): """ Create the list of operation-nodes & *instructions* evaluating all @@ -197,15 +194,15 @@ def _build_execution_plan(self, dag, inputs, outputs): outp-names to decide whether to add (and which) del-instructions In the list :class:`DeleteInstructions` steps (DA) are inserted between - operation nodes to reduce the memory footprint of cached results. + operation nodes to reduce the memory footprint of solution. A DA is inserted whenever a *need* is not used by any other *operation* further down the DAG. - Note that since the *cache* is not reused across `compute()` invocations, + Note that since the `solutions` are not shared across `compute()` calls, any memory-reductions are for as long as a single computation runs. """ - plan = [] + steps = [] # create an execution order such that each layer's needs are provided. ordered_nodes = iset(nx.topological_sort(dag)) @@ -218,12 +215,12 @@ def _build_execution_plan(self, dag, inputs, outputs): if node in inputs and dag.pred[node]: # Command pinning only when there is another operation # generating this data as output. - plan.append(PinInstruction(node)) + steps.append(PinInstruction(node)) elif isinstance(node, Operation): - plan.append(node) + steps.append(node) - # Keep all values in cache if not specific outputs asked. + # Keep all values in solution if not specific outputs asked. if not outputs: continue @@ -231,8 +228,7 @@ def _build_execution_plan(self, dag, inputs, outputs): # predecessor may be deleted if it is a data placeholder that # is no longer needed by future Operations. for need in self.graph.pred[node]: - if self._debug: - print("checking if node %s can be deleted" % need) + log.debug("checking if node %s can be deleted", need) for future_node in ordered_nodes[i+1:]: if ( isinstance(future_node, Operation) @@ -241,14 +237,13 @@ def _build_execution_plan(self, dag, inputs, outputs): break else: if need not in outputs: - if self._debug: - print(" adding delete instruction for %s" % need) - plan.append(DeleteInstruction(need)) + log.debug(" adding delete instruction for %s", need) + steps.append(DeleteInstruction(need)) else: raise AssertionError("Unrecognized network graph node %r" % node) - return plan + return steps def _collect_unsatisfied_operations(self, dag, inputs): """ @@ -261,7 +256,7 @@ def _collect_unsatisfied_operations(self, dag, inputs): all its needs have been accounted, so we can get its satisfaction. - Their provided outputs are not linked to any data in the dag. - An operation might not have any output link when :meth:`_prune_dag()` + An operation might not have any output link when :meth:`_prune_graph()` has broken them, due to given intermediate inputs. :param dag: @@ -303,7 +298,7 @@ def _collect_unsatisfied_operations(self, dag, inputs): return unsatisfied - def _prune_dag(self, outputs, inputs): + def _prune_graph(self, outputs, inputs): """ Determines what graph steps need to run to get to the requested outputs from the provided inputs. : @@ -341,8 +336,10 @@ def _prune_dag(self, outputs, inputs): # (unless they are also used elsewhere). # To discover which ones to prune, we break their incoming edges # and they will drop out while collecting ancestors from the outputs. + broken_edges = set() # unordered, not iterated for given in graph_inputs: - broken_dag.remove_edges_from(list(broken_dag.in_edges(given))) + broken_edges.update(broken_dag.in_edges(given)) + broken_dag.remove_edges_from(broken_edges) if outputs: # If caller requested specific outputs, we can prune any @@ -355,62 +352,75 @@ def _prune_dag(self, outputs, inputs): # Prune unsatisfied operations (those with partial inputs or no outputs). unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) - pruned_dag = dag.subgraph(self.graph.nodes - unsatisfied) - - return pruned_dag.copy() # clone so that it is picklable + # Clone it so that it is picklable. + pruned_dag = dag.subgraph(self.graph.nodes - unsatisfied).copy() + return pruned_dag, tuple(broken_edges) - def compile(self, outputs=(), inputs=()): + def compile(self, inputs=(), outputs=()): """ - Solve dag, set the :attr:`execution_plan`, and cache it. + Create or get from cache an execution-plan for the given inputs/outputs. - See :meth:`_prune_dag()` for detailed description. + See :meth:`_prune_graph()` and :meth:`_build_execution_steps()` + for detailed description. - :param iterable outputs: - A list of desired output names. This can also be ``None``, in which - case the necessary steps are all graph nodes that are reachable - from one of the provided inputs. - - :param dict inputs: - The input names of all given inputs. - """ + :param inputs: + An iterable with the names of all the given inputs. - # return steps if it has already been computed before for this set of inputs and outputs - if outputs is not None and not isinstance(outputs, str): - outputs = tuple(sorted(outputs)) - inputs_keys = tuple(sorted(inputs)) - cache_key = (inputs_keys, outputs) + :param outputs: + (optional) An iterable or the name of the output name(s). + If missing, requested outputs assumed all graph reachable nodes + from one of the given inputs. - if cache_key in self._cached_compilations: - dag, plan = self._cached_compilations[cache_key] + :return: + the cached or fresh new execution-plan + """ + # outputs must be iterable + if not outputs: + outputs = () + elif isinstance(outputs, str): + outputs = (outputs, ) + + # Make a stable cache-key + cache_key = (tuple(sorted(inputs)), tuple(sorted(outputs))) + if cache_key in self._cached_plans: + # An execution plan has been compiled before + # for the same inputs & outputs. + plan = self._cached_plans[cache_key] else: - dag = self._prune_dag(outputs, inputs) - plan = self._build_execution_plan(dag, inputs, outputs) + # Build a new execution plan for the given inputs & outputs. + # + pruned_dag, broken_edges = self._prune_graph(outputs, inputs) + steps = self._build_execution_steps(pruned_dag, inputs, outputs) + plan = ExecutionPlan( + self, + tuple(inputs), + outputs, + pruned_dag, + tuple(broken_edges), + tuple(steps), + ) # Cache compilation results to speed up future runs # with different values (but same number of inputs/outputs). - self._cached_compilations[cache_key] = dag, plan - - ## TODO: Extract into Solution class - self.execution_dag = dag - self.execution_plan = plan - + self._cached_plans[cache_key] = plan + return plan def compute( - self, outputs, named_inputs, method=None, overwrites_collector=None): + self, named_inputs, outputs, method=None, overwrites_collector=None): """ Solve & execute the graph, sequentially or parallel. - :param list output: The names of the data node you'd like to have returned - once all necessary computations are complete. - If you set this variable to ``None``, all - data nodes will be kept and returned at runtime. + :param dict named_inputs: + A dict of key/value pairs where the keys represent the data nodes + you want to populate, and the values are the concrete values you + want to set for the data node. - :param dict named_inputs: A dict of key/value pairs where the keys - represent the data nodes you want to populate, - and the values are the concrete values you - want to set for the data node. + :param list output: + once all necessary computations are complete. + If you set this variable to ``None``, all data nodes will be kept + and returned at runtime. :param method: if ``"parallel"``, launches multi-threading. @@ -427,144 +437,22 @@ def compute( assert isinstance(outputs, (list, tuple)) or outputs is None,\ "The outputs argument must be a list" - # start with fresh data cache & overwrites - cache = named_inputs.copy() - - # Build and set :attr:`execution_plan`. - self.compile(outputs, named_inputs.keys()) - - # choose a method of execution - if method == "parallel": - self._execute_thread_pool_barrier_method( - cache, overwrites_collector, named_inputs) - else: - self._execute_sequential_method( - cache, overwrites_collector, named_inputs) - - if not outputs: - # Return the whole cache as output, including input and - # intermediate data nodes. - result = cache - - else: - # Filter outputs to just return what's needed. - # Note: list comprehensions exist in python 2.7+ - result = dict(i for i in cache.items() if i[0] in outputs) - - return result - - - def _pin_data_in_cache(self, value_name, cache, inputs, overwrites): - value_name = str(value_name) - if overwrites is not None: - overwrites[value_name] = cache[value_name] - cache[value_name] = inputs[value_name] - - - def _execute_thread_pool_barrier_method( - self, cache, overwrites, inputs, thread_pool_size=10 - ): - """ - This method runs the graph using a parallel pool of thread executors. - You may achieve lower total latency if your graph is sufficiently - sub divided into operations using this method. - """ - from multiprocessing.dummy import Pool - - # if we have not already created a thread_pool, create one - if not hasattr(self, "_thread_pool"): - self._thread_pool = Pool(thread_pool_size) - pool = self._thread_pool - - - # this keeps track of all nodes that have already executed - executed_nodes = set() # unordered, not iterated - - # with each loop iteration, we determine a set of operations that can be - # scheduled, then schedule them onto a thread pool, then collect their - # results onto a memory cache for use upon the next iteration. - while True: - - # the upnext list contains a list of operations for scheduling - # in the current round of scheduling - upnext = [] - for node in self.execution_plan: - if ( - isinstance(node, Operation) - and self._can_schedule_operation(node, executed_nodes) - and node not in executed_nodes - ): - upnext.append(node) - elif isinstance(node, DeleteInstruction): - # Only delete if all successors for the data node - # have been executed. - # An optional need may not have a value in the cache. - if ( - node in cache - and self._can_evict_value(node, executed_nodes) - ): - if self._debug: - print("removing data '%s' from cache." % node) - del cache[node] - elif isinstance(node, PinInstruction): - # Always and repeatedely pin the value, even if not all - # providers of the data have executed. - # An optional need may not have a value in the cache. - if node in cache: - self._pin_data_in_cache(node, cache, inputs, overwrites) + # Build the execution plan. + plan = self.compile(named_inputs.keys(), outputs) + # start with fresh data solution. + solution = dict(named_inputs) - # stop if no nodes left to schedule, exit out of the loop - if len(upnext) == 0: - break + plan.execute(solution, overwrites_collector, method) - done_iterator = pool.imap_unordered( - lambda op: (op,op._compute(cache)), - upnext) - for op, result in done_iterator: - cache.update(result) - executed_nodes.add(op) - - - def _execute_sequential_method(self, cache, overwrites, inputs): - """ - This method runs the graph one operation at a time in a single thread - """ - self.times = {} - for step in self.execution_plan: - - if isinstance(step, Operation): - - if self._debug: - print("-"*32) - print("executing step: %s" % step.name) - - # time execution... - t0 = time.time() - - # compute layer outputs - layer_outputs = step._compute(cache) - - # add outputs to cache - cache.update(layer_outputs) - - # record execution time - t_complete = round(time.time() - t0, 5) - self.times[step.name] = t_complete - if self._debug: - print("step completion time: %s" % t_complete) - - elif isinstance(step, DeleteInstruction): - # Cache value may be missing if it is optional. - if step in cache: - if self._debug: - print("removing data '%s' from cache." % step) - del cache[step] + if outputs: + # Filter outputs to just return what's requested. + # Otherwise, eturn the whole solution as output, + # including input and intermediate data nodes. + # TODO: assert no other outputs exists due to DelInstructs. + solution = dict(i for i in solution.items() if i[0] in outputs) - elif isinstance(step, PinInstruction): - self._pin_data_in_cache(step, cache, inputs, overwrites) - else: - raise AssertionError("Unrecognized instruction.%r" % step) + return solution def plot(self, filename=None, show=False): @@ -640,6 +528,46 @@ def get_node_name(a): return g +class ExecutionPlan(namedtuple("_ExecPlan", + "net inputs outputs dag broken_edges steps")): + """ + The result of the network's compilation phase. + + Note the execution plan's attributes are on purpose immutable tuples. + + :ivar net: + The parent :class:`Network` + + :ivar inputs: + A tuple with the names of the given inputs used to construct the plan. + + :ivar outputs: + A (possibly empy) tuple with the names of the requested outputs + used to construct the plan. + + :ivar dag: + The regular (not broken) *pruned* subgraph of net-graph. + + :ivar broken_edges: + Tuple of broken incoming edges to given data. + + :ivar steps: + The tuple of operation-nodes & *instructions* needed to evaluate + the given inputs & asked outputs, free memory and avoid overwritting + any given intermediate inputs. + """ + @property + def broken_dag(self): + return nx.restricted_view(self.dag, nodes=(), edges=self.broken_edges) + + def get_data_node(self, name): + """ + Retuen the data node from a graph using its name, or None. + """ + node = self.dag.nodes[name] + if isinstance(node, DataPlaceholderNode): + return node + def _can_schedule_operation(self, op, executed_nodes): """ Determines if a Operation is ready to be scheduled for execution @@ -654,15 +582,15 @@ def _can_schedule_operation(self, op, executed_nodes): A boolean indicating whether the operation may be scheduled for execution based on what has already been executed. """ - # unordered, not iterated - dependencies = set(n for n in nx.ancestors(self.execution_dag, op) + # Use `broken_dag` to allow executing operations after given inputs + # regardless of whether their producers have yet to run. + dependencies = set(n for n in nx.ancestors(self.broken_dag, op) if isinstance(n, Operation)) return dependencies.issubset(executed_nodes) - def _can_evict_value(self, name, executed_nodes): """ - Determines if a DataPlaceholderNode is ready to be deleted from cache. + Determines if a DataPlaceholderNode is ready to be deleted from solution. :param name: The name of the data node to check @@ -672,13 +600,142 @@ def _can_evict_value(self, name, executed_nodes): A boolean indicating whether the data node can be deleted or not. """ data_node = self.get_data_node(name) + # Use `broken_dag` not to block a successor waiting for this data, + # since in any case will use a given input, not some pipe of this data. return data_node and set( - self.execution_dag.successors(data_node)).issubset(executed_nodes) + self.broken_dag.successors(data_node)).issubset(executed_nodes) - def get_data_node(self, name): + def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): + value_name = str(value_name) + if overwrites is not None: + overwrites[value_name] = solution[value_name] + solution[value_name] = inputs[value_name] + + def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, + thread_pool_size=10 + ): """ - Retuen the data node from a graph using its name, or None. + This method runs the graph using a parallel pool of thread executors. + You may achieve lower total latency if your graph is sufficiently + sub divided into operations using this method. """ - node = self.graph.nodes[name] - if isinstance(node, DataPlaceholderNode): - return node + from multiprocessing.dummy import Pool + + # if we have not already created a thread_pool, create one + if not hasattr(self.net, "_thread_pool"): + self.net._thread_pool = Pool(thread_pool_size) + pool = self.net._thread_pool + + + # this keeps track of all nodes that have already executed + executed_nodes = set() # unordered, not iterated + + # with each loop iteration, we determine a set of operations that can be + # scheduled, then schedule them onto a thread pool, then collect their + # results onto a memory solution for use upon the next iteration. + while True: + + # the upnext list contains a list of operations for scheduling + # in the current round of scheduling + upnext = [] + for node in self.steps: + if ( + isinstance(node, Operation) + and self._can_schedule_operation(node, executed_nodes) + and node not in executed_nodes + ): + upnext.append(node) + elif isinstance(node, DeleteInstruction): + # Only delete if all successors for the data node + # have been executed. + # An optional need may not have a value in the solution. + if ( + node in solution + and self._can_evict_value(node, executed_nodes) + ): + log.debug("removing data '%s' from solution.", node) + del solution[node] + elif isinstance(node, PinInstruction): + # Always and repeatedely pin the value, even if not all + # providers of the data have executed. + # An optional need may not have a value in the solution. + if node in solution: + self._pin_data_in_solution( + node, solution, inputs, overwrites) + + + # stop if no nodes left to schedule, exit out of the loop + if len(upnext) == 0: + break + + done_iterator = pool.imap_unordered( + lambda op: (op,op._compute(solution)), + upnext) + for op, result in done_iterator: + solution.update(result) + executed_nodes.add(op) + + + def _execute_sequential_method(self, inputs, solution, overwrites): + """ + This method runs the graph one operation at a time in a single thread + """ + self.times = {} + for step in self.steps: + + if isinstance(step, Operation): + + log.debug("%sexecuting step: %s", "-"*32, step.name) + + # time execution... + t0 = time.time() + + # compute layer outputs + layer_outputs = step._compute(solution) + + # add outputs to solution + solution.update(layer_outputs) + + # record execution time + t_complete = round(time.time() - t0, 5) + self.times[step.name] = t_complete + log.debug("step completion time: %s", t_complete) + + elif isinstance(step, DeleteInstruction): + # Cache value may be missing if it is optional. + if step in solution: + log.debug("removing data '%s' from solution.", step) + del solution[step] + + elif isinstance(step, PinInstruction): + self._pin_data_in_solution(step, solution, inputs, overwrites) + else: + raise AssertionError("Unrecognized instruction.%r" % step) + + def execute(self, solution, overwrites=None, method=None): + """ + :param solution: + a mutable maping to collect the results and that must contain also + the given input values for at least the compulsory inputs that + were specified when the plan was built (but cannot enforce that!). + + :param overwrites: + (optional) a mutable dict to collect calculated-but-discarded values + because they were "pinned" by input vaules. + If missing, the overwrites values are simply discarded. + """ + + # choose a method of execution + executor = (self._execute_thread_pool_barrier_method + if method == "parallel" else + self._execute_sequential_method) + + # clone and keep orignal inputs in solution intact + executor(dict(solution), solution, overwrites) + + # return it, but caller can also see the results in `solution` dict. + return solution + +# TODO: maybe class Solution(object): +# values = {} +# overwrites = None diff --git a/test/test_graphkit.py b/test/test_graphkit.py index ba7f2a0f..0da8a80f 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -77,11 +77,11 @@ def pow_op1(a, exponent=2): # Running the network # - # get all outputs - pprint(net({'a': 1, 'b': 2})) + # # get all outputs + # pprint(net({'a': 1, 'b': 2})) - # get specific outputs - pprint(net({'a': 1, 'b': 2}, outputs=["sum_ab_times_b"])) + # # get specific outputs + # pprint(net({'a': 1, 'b': 2}, outputs=["sum_ab_times_b"])) # start with inputs already computed pprint(net({"sum_ab": 1, "b": 2}, outputs=["sum_ab_times_b"])) @@ -479,19 +479,19 @@ def count_deletions(steps): exp = inp.copy(); exp.update({"aa": 2, "ab": 5, "asked": 7}) res = pipeline(inp) assert res == exp # ok - steps11 = pipeline.net.execution_plan + steps11 = pipeline.compile(inp).steps res = pipeline(inp, outputs=["asked"]) assert res == filtdict(exp, "asked") # ok - steps12 = pipeline.net.execution_plan + steps12 = pipeline.compile(inp, ["asked"]).steps inp = {"a": 2} exp = inp.copy(); exp.update({"aa": 2, "asked": 12}) res = pipeline(inp) assert res == exp # ok - steps21 = pipeline.net.execution_plan + steps21 = pipeline.compile(inp).steps res = pipeline(inp, outputs=["asked"]) assert res == filtdict(exp, "asked") # ok - steps22 = pipeline.net.execution_plan + steps22 = pipeline.compile(inp, ["asked"]).steps # When no outs, no del-instructs. assert steps11 != steps12 From 4e55b30310d04660c53553bb9e2e87669271b9bd Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 3 Oct 2019 20:54:53 +0300 Subject: [PATCH 037/167] enh(build,ci): use pytest in travis --- .travis.yml | 4 ++-- setup.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index d8657a8f..bb7d875e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,13 +8,13 @@ python: install: - pip install Sphinx sphinx_rtd_theme codecov packaging - "python -c $'import os, packaging.version as version\\nv = version.parse(os.environ.get(\"TRAVIS_TAG\", \"1.0\")).public\\nwith open(\"VERSION\", \"w\") as f: f.write(v)'" - - python setup.py install + - pip install -e .[test] - cd docs - make clean html - cd .. script: - - python setup.py nosetests --with-coverage --cover-package=graphkit + - pytest -v --cov=graphkit deploy: provider: pypi diff --git a/setup.py b/setup.py index 51d606fc..c8d231a5 100644 --- a/setup.py +++ b/setup.py @@ -34,9 +34,9 @@ ], extras_require={ 'plot': ['pydot', 'matplotlib'], - 'test': ['pydot', 'matplotlib', 'pytest'], + 'test': ['pydot', 'matplotlib', 'pytest', "pytest-cov"], }, - tests_require=['pytest'], + tests_require=['pytest', "pytest-cov"], license='Apache-2.0', keywords=['graph', 'computation graph', 'DAG', 'directed acyclical graph'], classifiers=[ From 47b50f6bcd9734d20314afaa3f829739137e08e6 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 02:03:36 +0300 Subject: [PATCH 038/167] fix(plot): NetOp did not return pydot instance --- graphkit/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/base.py b/graphkit/base.py index 1c04e8d5..631c66ab 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -172,7 +172,7 @@ def set_execution_method(self, method): self._execution_method = method def plot(self, filename=None, show=False): - self.net.plot(filename=filename, show=show) + return self.net.plot(filename=filename, show=show) def __getstate__(self): state = Operation.__getstate__(self) From b1d02a1918da028fc163a1cc132280e2468f8560 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 02:10:22 +0300 Subject: [PATCH 039/167] refact(plot): extract plot function out of Network class... to use it also on stary DAGs. Keep delegation. --- graphkit/network.py | 158 ++++++++++++++++++++++-------------------- test/test_graphkit.py | 4 +- 2 files changed, 86 insertions(+), 76 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index f0c7444a..3f9cbdaa 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -375,84 +375,14 @@ def _compute_sequential_method(self, named_inputs, outputs): return {k: cache[k] for k in iter(cache) if k in outputs} - @staticmethod - def supported_plot_writers(): - return { - ".png": lambda gplot: gplot.create_png(), - ".dot": lambda gplot: gplot.to_string(), - ".jpg": lambda gplot: gplot.create_jpeg(), - ".jpeg": lambda gplot: gplot.create_jpeg(), - ".pdf": lambda gplot: gplot.create_pdf(), - ".svg": lambda gplot: gplot.create_svg(), - } - def plot(self, filename=None, show=False): """ - Plot the graph. - - params: - :param str filename: - Write the output to a png, pdf, or graphviz dot file. The extension - controls the output format. - - :param boolean show: - If this is set to True, use matplotlib to show the graph diagram - (Default: False) - - :returns: - An instance of the pydot graph + Plot a *Graphviz* graph and return it, if no other argument provided. + Supported arguments: filename, show + See :func:`network.plot_graph()` """ - import pydot - import matplotlib.pyplot as plt - import matplotlib.image as mpimg - - assert self.graph is not None - - def get_node_name(a): - if isinstance(a, DataPlaceholderNode): - return a - return a.name - - g = pydot.Dot(graph_type="digraph") - - # draw nodes - for nx_node in self.graph.nodes(): - if isinstance(nx_node, DataPlaceholderNode): - node = pydot.Node(name=nx_node, shape="rect") - else: - node = pydot.Node(name=nx_node.name, shape="circle") - g.add_node(node) - - # draw edges - for src, dst in self.graph.edges(): - src_name = get_node_name(src) - dst_name = get_node_name(dst) - edge = pydot.Edge(src=src_name, dst=dst_name) - g.add_edge(edge) - - # save plot - if filename: - _basename, ext = os.path.splitext(filename) - writers = Network.supported_plot_writers() - plot_writer = Network.supported_plot_writers().get(ext.lower()) - if not plot_writer: - raise ValueError( - "Unknown file format for saving graph: %s" - " File extensions must be one of: %s" - % (ext, ' '.join(writers))) - with open(filename, "wb") as fh: - fh.write(plot_writer(g)) - - # display graph via matplotlib - if show: - png = g.create_png() - sio = StringIO(png) - img = mpimg.imread(sio) - plt.imshow(img, aspect="equal") - plt.show() - - return g + return plot_graph(self.graph, filename=filename, show=show) def ready_to_schedule_operation(op, has_executed, graph): @@ -501,3 +431,83 @@ def get_data_node(name, graph): if node == name and isinstance(node, DataPlaceholderNode): return node return None + + +def supported_plot_writers(): + return { + ".png": lambda gplot: gplot.create_png(), + ".dot": lambda gplot: gplot.to_string(), + ".jpg": lambda gplot: gplot.create_jpeg(), + ".jpeg": lambda gplot: gplot.create_jpeg(), + ".pdf": lambda gplot: gplot.create_pdf(), + ".svg": lambda gplot: gplot.create_svg(), + } + + +def plot_graph(graph, filename=None, show=False): + """ + Plot a *Graphviz* graph and return it, if no other argument provided. + + :param graph: + what to plot + :param str filename: + Write the output to a png, pdf, or graphviz dot file. The extension + controls the output format. + :param boolean show: + If this is set to True, use matplotlib to show the graph diagram + (Default: False) + + :returns: + An instance of the pydot graph + + """ + import pydot + import matplotlib.pyplot as plt + import matplotlib.image as mpimg + + assert graph is not None + + def get_node_name(a): + if isinstance(a, DataPlaceholderNode): + return a + return a.name + + g = pydot.Dot(graph_type="digraph") + + # draw nodes + for nx_node in graph.nodes(): + if isinstance(nx_node, DataPlaceholderNode): + node = pydot.Node(name=nx_node, shape="rect") + else: + node = pydot.Node(name=nx_node.name, shape="circle") + g.add_node(node) + + # draw edges + for src, dst in graph.edges(): + src_name = get_node_name(src) + dst_name = get_node_name(dst) + edge = pydot.Edge(src=src_name, dst=dst_name) + g.add_edge(edge) + + # save plot + if filename: + _basename, ext = os.path.splitext(filename) + writers = Network.supported_plot_writers() + plot_writer = Network.supported_plot_writers().get(ext.lower()) + if not plot_writer: + raise ValueError( + "Unknown file format for saving graph: %s" + " File extensions must be one of: %s" + % (ext, ' '.join(writers))) + with open(filename, "wb") as fh: + fh.write(plot_writer(g)) + + # display graph via matplotlib + if show: + png = g.create_png() + sio = StringIO(png) + img = mpimg.imread(sio) + plt.imshow(img, aspect="equal") + plt.show() + + return g diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bdd0ab37..bb08cf15 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -327,7 +327,7 @@ def test_plotting(): sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) - for ext in network.Network.supported_plot_writers(): + for ext in network.supported_plot_writers(): tdir = tempfile.mkdtemp(suffix=ext) png_file = osp.join(tdir, "workflow.png") net1.net.plot(png_file) @@ -342,7 +342,7 @@ def test_plotting(): assert "Unknown file format" in str(ex) ## Check help msg lists all siupported formats - for ext in network.Network.supported_plot_writers(): + for ext in network.supported_plot_writers(): assert ext in str(ex) From c11af2ae384340b675e8d5aec0b48c28250f8757 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 02:31:33 +0300 Subject: [PATCH 040/167] fix(plot): matplotlib plot was failing in PY3 due IO io misuse --- graphkit/network.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 3f9cbdaa..265b048f 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -1,8 +1,9 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -import time +import io import os +import time import networkx as nx from io import StringIO @@ -505,7 +506,7 @@ def get_node_name(a): # display graph via matplotlib if show: png = g.create_png() - sio = StringIO(png) + sio = io.BytesIO(png) img = mpimg.imread(sio) plt.imshow(img, aspect="equal") plt.show() From 344490be8cafcc466674bc986fa7118bd68c99d7 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 03:04:06 +0300 Subject: [PATCH 041/167] FEAT(plot): overlay Execution STEPS on diagrams --- graphkit/network.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 265b048f..035facd5 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -6,7 +6,6 @@ import time import networkx as nx -from io import StringIO from .base import Operation @@ -383,7 +382,7 @@ def plot(self, filename=None, show=False): Supported arguments: filename, show See :func:`network.plot_graph()` """ - return plot_graph(self.graph, filename=filename, show=show) + return plot_graph(self.graph, filename, show, self.steps) def ready_to_schedule_operation(op, has_executed, graph): @@ -445,9 +444,9 @@ def supported_plot_writers(): } -def plot_graph(graph, filename=None, show=False): +def plot_graph(graph, filename=None, show=False, steps=None): """ - Plot a *Graphviz* graph and return it, if no other argument provided. + Plot a *Graphviz* graph/steps and return it, if no other argument provided. :param graph: what to plot @@ -457,6 +456,8 @@ def plot_graph(graph, filename=None, show=False): :param boolean show: If this is set to True, use matplotlib to show the graph diagram (Default: False) + :param steps: + a list of nodes & instructions to overlay on the diagram :returns: An instance of the pydot graph @@ -469,18 +470,23 @@ def plot_graph(graph, filename=None, show=False): assert graph is not None def get_node_name(a): - if isinstance(a, DataPlaceholderNode): - return a - return a.name + if isinstance(a, Operation): + return a.name + return a g = pydot.Dot(graph_type="digraph") # draw nodes - for nx_node in graph.nodes(): + for nx_node in graph.nodes: + kw = {} if isinstance(nx_node, DataPlaceholderNode): - node = pydot.Node(name=nx_node, shape="rect") + if nx_node in steps: + kw = {'color': 'red', 'style': 'bold'} + node = pydot.Node(name=nx_node, shape="rect", **kw) else: - node = pydot.Node(name=nx_node.name, shape="circle") + if nx_node in steps: + kw = {'style': 'bold'} + node = pydot.Node(name=nx_node.name, shape="circle", **kw) g.add_node(node) # draw edges @@ -490,6 +496,18 @@ def get_node_name(a): edge = pydot.Edge(src=src_name, dst=dst_name) g.add_edge(edge) + # draw steps sequence + if steps and len(steps) > 1: + it1 = iter(steps) + it2 = iter(steps); next(it2) + for i, (src, dst) in enumerate(zip(it1, it2), 1): + src_name = get_node_name(src) + dst_name = get_node_name(dst) + edge = pydot.Edge( + src=src_name, dst=dst_name, label=str(i), style="dotted", + penwidth='2') + g.add_edge(edge) + # save plot if filename: _basename, ext = os.path.splitext(filename) From 23ef81e3cf08af2074a14195c101da814e408097 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 03:44:01 +0300 Subject: [PATCH 042/167] ENH(plot): +inputs, +outputs, +solution modify plotting (see #13 for an example): --- graphkit/base.py | 5 +++-- graphkit/network.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 631c66ab..608a96d0 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -171,8 +171,9 @@ def set_execution_method(self, method): assert method in options self._execution_method = method - def plot(self, filename=None, show=False): - return self.net.plot(filename=filename, show=show) + def plot(self, filename=None, show=False, + inputs=None, outputs=None, solution=None): + return self.net.plot(filename, show, inputs, outputs, solution) def __getstate__(self): state = Operation.__getstate__(self) diff --git a/graphkit/network.py b/graphkit/network.py index 035facd5..9fdf71c2 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -375,14 +375,15 @@ def _compute_sequential_method(self, named_inputs, outputs): return {k: cache[k] for k in iter(cache) if k in outputs} - def plot(self, filename=None, show=False): + def plot(self, filename=None, show=False, + inputs=None, outputs=None, solution=None): """ Plot a *Graphviz* graph and return it, if no other argument provided. - Supported arguments: filename, show See :func:`network.plot_graph()` """ - return plot_graph(self.graph, filename, show, self.steps) + return plot_graph(self.graph, filename, show, self.steps, + inputs, outputs, solution) def ready_to_schedule_operation(op, has_executed, graph): @@ -444,7 +445,8 @@ def supported_plot_writers(): } -def plot_graph(graph, filename=None, show=False, steps=None): +def plot_graph(graph, filename=None, show=False, steps=None, + inputs=None, outputs=None, solution=None): """ Plot a *Graphviz* graph/steps and return it, if no other argument provided. @@ -458,6 +460,13 @@ def plot_graph(graph, filename=None, show=False, steps=None): (Default: False) :param steps: a list of nodes & instructions to overlay on the diagram + :param inputs: + an optional list, any nodes in there are plotted as `"house" + `_ + :param outputs: + an optional list, any nodes in there are plotted as `"invhouse" + :param outputs: + an optional dict, any values in there are included in the node-name :returns: An instance of the pydot graph @@ -480,9 +489,25 @@ def get_node_name(a): for nx_node in graph.nodes: kw = {} if isinstance(nx_node, DataPlaceholderNode): + # Only DeleteInstructions data in steps. if nx_node in steps: kw = {'color': 'red', 'style': 'bold'} - node = pydot.Node(name=nx_node, shape="rect", **kw) + + # SHAPE change if in inputs/outputs. + shape="rect" + if inputs and nx_node in inputs: + shape="invhouse" + if outputs and nx_node in outputs: + if inputs and nx_node in inputs: + shape="polygon" + else: + shape="house" + + # LABEL change from solution. + name = str(nx_node) + if solution and nx_node in solution: + name = "%s: %s" % (nx_node, solution.get(nx_node)) + node = pydot.Node(name=nx_node, label=name, shape=shape, **kw) else: if nx_node in steps: kw = {'style': 'bold'} @@ -511,8 +536,8 @@ def get_node_name(a): # save plot if filename: _basename, ext = os.path.splitext(filename) - writers = Network.supported_plot_writers() - plot_writer = Network.supported_plot_writers().get(ext.lower()) + writers = supported_plot_writers() + plot_writer = supported_plot_writers().get(ext.lower()) if not plot_writer: raise ValueError( "Unknown file format for saving graph: %s" From 4e8601ce9c4a9372ed398cd6402c2a83bb059f25 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 12:43:10 +0300 Subject: [PATCH 043/167] refact(plot.TC): move plot tests to early beggining --- test/test_graphkit.py | 50 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index bb08cf15..dff4e655 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -107,6 +107,31 @@ def test_network_deep_merge(): pprint(net3({'a': 1, 'b': 2, 'c': 4})) +def test_plotting(): + sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) + sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) + sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) + net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) + + for ext in network.supported_plot_writers(): + tdir = tempfile.mkdtemp(suffix=ext) + png_file = osp.join(tdir, "workflow.png") + net1.net.plot(png_file) + try: + assert osp.exists(png_file) + finally: + shutil.rmtree(tdir, ignore_errors=True) + try: + net1.net.plot('bad.format') + assert False, "Should had failed writting arbitrary file format!" + except ValueError as ex: + assert "Unknown file format" in str(ex) + + ## Check help msg lists all siupported formats + for ext in network.supported_plot_writers(): + assert ext in str(ex) + + def test_input_based_pruning(): # Tests to make sure we don't need to pass graph inputs if we're provided # with data further downstream in the graph as an input. @@ -321,31 +346,6 @@ def infer(i): pool.close() -def test_plotting(): - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) - net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) - - for ext in network.supported_plot_writers(): - tdir = tempfile.mkdtemp(suffix=ext) - png_file = osp.join(tdir, "workflow.png") - net1.net.plot(png_file) - try: - assert osp.exists(png_file) - finally: - shutil.rmtree(tdir, ignore_errors=True) - try: - net1.net.plot('bad.format') - assert False, "Should had failed writting arbitrary file format!" - except ValueError as ex: - assert "Unknown file format" in str(ex) - - ## Check help msg lists all siupported formats - for ext in network.supported_plot_writers(): - assert ext in str(ex) - - #################################### # Backwards compatibility #################################### From 834a8b0784d3ac58df2e4530b76cbbc4681a1582 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 12:44:33 +0300 Subject: [PATCH 044/167] doc(plot): tell supported formats in doctest, +TC --- graphkit/network.py | 4 ++-- test/test_graphkit.py | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 9fdf71c2..987629f3 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -453,8 +453,8 @@ def plot_graph(graph, filename=None, show=False, steps=None, :param graph: what to plot :param str filename: - Write the output to a png, pdf, or graphviz dot file. The extension - controls the output format. + Write the output to a file. + The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` :param boolean show: If this is set to True, use matplotlib to show the graph diagram (Default: False) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index dff4e655..826719d6 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -132,6 +132,11 @@ def test_plotting(): assert ext in str(ex) +def test_plotting_docstring(): + for ext in network.supported_plot_writers(): + assert ext in network.plot_graph.__doc__ + + def test_input_based_pruning(): # Tests to make sure we don't need to pass graph inputs if we're provided # with data further downstream in the graph as an input. From c2e28a405a46ee08a4ae5f20d13811f01759dcaf Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 12:46:10 +0300 Subject: [PATCH 045/167] doc(plot): add legend & example; docstring in netop.plot() --- graphkit/base.py | 5 +++++ graphkit/network.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/graphkit/base.py b/graphkit/base.py index 608a96d0..512a95bc 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -173,6 +173,11 @@ def set_execution_method(self, method): def plot(self, filename=None, show=False, inputs=None, outputs=None, solution=None): + """ + Plot a *Graphviz* graph and return it, if no other argument provided. + + See :func:`network.plot_graph()` for arguments, legend, and example code. + """ return self.net.plot(filename, show, inputs, outputs, solution) def __getstate__(self): diff --git a/graphkit/network.py b/graphkit/network.py index 987629f3..1f7d81e9 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -450,6 +450,23 @@ def plot_graph(graph, filename=None, show=False, steps=None, """ Plot a *Graphviz* graph/steps and return it, if no other argument provided. + Legend: + + NODES: + + - **circle**: function + - **house**: input (given) + - **inversed-house**: output (asked) + - **polygon**: given both as input & asked as output (what?) + - **square**: intermediate data (neither given nor asked) + - **red frame**: delete-instruction (to free up memory) + + ARROWS + + - **solid black arrows**: dependencies (target ``need`` source, + sources ``provide`` target) + - **green-dotted arrows**: execution steps labeled in succession + :param graph: what to plot :param str filename: @@ -471,6 +488,18 @@ def plot_graph(graph, filename=None, show=False, steps=None, :returns: An instance of the pydot graph + **Example:** + + >>> netop = compose(name="netop")( + ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), + ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), + ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), + ... ) + + >>> inputs = {'a': 1, 'b1': 2} + >>> solution=netop(inputs) + >>> netop.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); + """ import pydot import matplotlib.pyplot as plt From e38c8ad72f2c21e8ca923199f4be10b88794f997 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 13:54:49 +0300 Subject: [PATCH 046/167] enh(plot): mark optional "needs" --- graphkit/network.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 1f7d81e9..e06427c2 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -463,8 +463,9 @@ def plot_graph(graph, filename=None, show=False, steps=None, ARROWS - - **solid black arrows**: dependencies (target ``need`` source, - sources ``provide`` target) + - **solid black arrows**: dependencies (source-data are``need``\ed + by target-operations, sources-operations ``provide`` target-data) + - **dashed black arrows**: optional needs - **green-dotted arrows**: execution steps labeled in succession :param graph: @@ -547,7 +548,12 @@ def get_node_name(a): for src, dst in graph.edges(): src_name = get_node_name(src) dst_name = get_node_name(dst) - edge = pydot.Edge(src=src_name, dst=dst_name) + kw = {} + if isinstance(dst, Operation) and any(n == src + and isinstance(n, optional) + for n in dst.needs): + kw["style"] = "dashed" + edge = pydot.Edge(src=src_name, dst=dst_name, **kw) g.add_edge(edge) # draw steps sequence From d855bf688f607e6a4b33961b751b68d53dc5f2b7 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 13:55:10 +0300 Subject: [PATCH 047/167] ENH(plot): visual enhamcents on nodes & edges --- graphkit/network.py | 58 +++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index e06427c2..1c246530 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -7,7 +7,8 @@ import networkx as nx -from .base import Operation +from .base import Operation, NetworkOperation +from .modifiers import optional class DataPlaceholderNode(str): @@ -380,7 +381,7 @@ def plot(self, filename=None, show=False, """ Plot a *Graphviz* graph and return it, if no other argument provided. - See :func:`network.plot_graph()` + See :func:`network.plot_graph()` for arguments, legend, and example code. """ return plot_graph(self.graph, filename, show, self.steps, inputs, outputs, solution) @@ -452,14 +453,18 @@ def plot_graph(graph, filename=None, show=False, steps=None, Legend: + NODES: - **circle**: function - - **house**: input (given) - - **inversed-house**: output (asked) + - **oval**: subgraph function + - **house**: given input + - **inversed-house**: asked output - **polygon**: given both as input & asked as output (what?) - - **square**: intermediate data (neither given nor asked) - - **red frame**: delete-instruction (to free up memory) + - **square**: intermediate data, neither given nor asked. + - **red frame**: delete-instruction, to free up memory. + - **filled**: data node has a value in `solution`, shown in tooltip. + - **thick frame**: function/data node visited. ARROWS @@ -473,6 +478,7 @@ def plot_graph(graph, filename=None, show=False, steps=None, :param str filename: Write the output to a file. The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` + Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. :param boolean show: If this is set to True, use matplotlib to show the graph diagram (Default: False) @@ -518,34 +524,39 @@ def get_node_name(a): # draw nodes for nx_node in graph.nodes: kw = {} - if isinstance(nx_node, DataPlaceholderNode): + if isinstance(nx_node, str): # Only DeleteInstructions data in steps. if nx_node in steps: - kw = {'color': 'red', 'style': 'bold'} - + kw = {'color': 'red', 'penwidth': 2} + # SHAPE change if in inputs/outputs. shape="rect" - if inputs and nx_node in inputs: - shape="invhouse" - if outputs and nx_node in outputs: + if inputs and outputs and nx_node in inputs and nx_node in outputs: + shape="hexagon" + else: if inputs and nx_node in inputs: - shape="polygon" - else: + shape="invhouse" + if outputs and nx_node in outputs: shape="house" # LABEL change from solution. - name = str(nx_node) if solution and nx_node in solution: - name = "%s: %s" % (nx_node, solution.get(nx_node)) - node = pydot.Node(name=nx_node, label=name, shape=shape, **kw) - else: + kw["style"] = "filled" + kw["fillcolor"] = "gray" + # kw["tooltip"] = nx_node, solution.get(nx_node) + node = pydot.Node(name=nx_node, shape=shape, + URL="fdgfdf", **kw) + else: # Operation + kw = {} + shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" if nx_node in steps: - kw = {'style': 'bold'} - node = pydot.Node(name=nx_node.name, shape="circle", **kw) + kw["style"] = "bold" + node = pydot.Node(name=nx_node.name, shape=shape, **kw) + g.add_node(node) # draw edges - for src, dst in graph.edges(): + for src, dst in graph.edges: src_name = get_node_name(src) dst_name = get_node_name(dst) kw = {} @@ -564,8 +575,9 @@ def get_node_name(a): src_name = get_node_name(src) dst_name = get_node_name(dst) edge = pydot.Edge( - src=src_name, dst=dst_name, label=str(i), style="dotted", - penwidth='2') + src=src_name, dst=dst_name, label=str(i), style='dotted', + color="green", fontcolor="green", fontname="bold", fontsize=18, + penwidth=3, arrowhead="vee") g.add_edge(edge) # save plot From ca5d243369bb8911a4fdd06c3eeb0b36d5045f57 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 14:01:41 +0300 Subject: [PATCH 048/167] test(plot): enhance plot test to try all #13 features; + test all chained plot() methods from netop. --- test/test_graphkit.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 826719d6..927632e3 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -108,21 +108,24 @@ def test_network_deep_merge(): def test_plotting(): - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) - net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) + pipeline = compose(name="netop")( + operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), + operation(name="sub", needs=["a", modifiers.optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), + operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), + ) + inputs = {'a': 1, 'b1': 2} + solution=pipeline(inputs) for ext in network.supported_plot_writers(): tdir = tempfile.mkdtemp(suffix=ext) png_file = osp.join(tdir, "workflow.png") - net1.net.plot(png_file) + pipeline.plot(png_file, inputs=inputs, solution=solution, outputs=['asked', 'b1']) try: assert osp.exists(png_file) finally: shutil.rmtree(tdir, ignore_errors=True) try: - net1.net.plot('bad.format') + pipeline.plot('bad.format') assert False, "Should had failed writting arbitrary file format!" except ValueError as ex: assert "Unknown file format" in str(ex) From f25f1897fbf583074a3cc34158235b76c29711dc Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 14:46:00 +0300 Subject: [PATCH 049/167] test(optional): +x1 TC to check selective optionals with same out... + enh TC with parallel. + Scavenged from #20. --- test/test_graphkit.py | 62 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 0da8a80f..1c7e3fcc 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -445,6 +445,68 @@ def addplusplus(a, b, c=0): assert results['sum'] == sum(named_inputs.values()) +def test_optional_per_function_with_same_output(): + # Test that the same need can be both optional and not on different operations. + # + ## ATTENTION, the selected function is NOT the one with more inputs + # but the 1st satisfiable function added in the network. + + add_op = operation(name='add', needs=['a', 'b'], provides='a+b')(add) + sub_op_optional = operation( + name='sub_opt', needs=['a', modifiers.optional('b')], provides='a+b' + )(lambda a, b=10: a - b) + + # Normal order + # + pipeline = compose(name='partial_optionals')(add_op, sub_op_optional) + # + named_inputs = {'a': 1, 'b': 2} + assert pipeline(named_inputs) == {'a': 1, 'a+b': 3, 'b': 2} + assert pipeline(named_inputs, ['a+b']) == {'a+b': 3} + # + named_inputs = {'a': 1} + assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} + assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + + # Inverse op order + # + pipeline = compose(name='partial_optionals')(sub_op_optional, add_op) + # + named_inputs = {'a': 1, 'b': 2} + assert pipeline(named_inputs) == {'a': 1, 'a+b': -1, 'b': 2} + assert pipeline(named_inputs, ['a+b']) == {'a+b': -1} + # + named_inputs = {'a': 1} + assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} + assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + + # PARALLEL + Normal order + # + pipeline = compose(name='partial_optionals')(add_op, sub_op_optional) + pipeline.set_execution_method("parallel") + # + named_inputs = {'a': 1, 'b': 2} + assert pipeline(named_inputs) == {'a': 1, 'a+b': 3, 'b': 2} + assert pipeline(named_inputs, ['a+b']) == {'a+b': 3} + # + named_inputs = {'a': 1} + assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} + assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + + # PARALLEL + Inverse op order + # + pipeline = compose(name='partial_optionals')(sub_op_optional, add_op) + pipeline.set_execution_method("parallel") + # + named_inputs = {'a': 1, 'b': 2} + assert pipeline(named_inputs) == {'a': 1, 'a+b': -1, 'b': 2} + assert pipeline(named_inputs, ['a+b']) == {'a+b': -1} + # + named_inputs = {'a': 1} + assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} + assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + + def test_deleted_optional(): # Test that DeleteInstructions included for optionals do not raise # exceptions when the corresponding input is not prodided. From a2de9efb2fbbb2fdb2a8e65c957db09297e0099f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 16:32:26 +0300 Subject: [PATCH 050/167] doc(plot); explain also params in user-facing API --- graphkit/base.py | 23 ++++++++++++++++++++--- graphkit/network.py | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 512a95bc..1298b566 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -174,9 +174,26 @@ def set_execution_method(self, method): def plot(self, filename=None, show=False, inputs=None, outputs=None, solution=None): """ - Plot a *Graphviz* graph and return it, if no other argument provided. - - See :func:`network.plot_graph()` for arguments, legend, and example code. + :param str filename: + Write diagram into a file. + The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` + Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. + :param boolean show: + If it evaluates to true, opens the diagram in a matplotlib window. + :param inputs: + an optional name list, any nodes in there are plotted + as a "house" + :param outputs: + an optional name list, any nodes in there are plotted + as an "inverted-house" + :param solution: + an optional dict with values to annotate nodes + (currently content not shown, but node drawn as "filled") + + :return: + An instance of the :mod`pydot` graph + + See :func:`network.plot_graph()` for the plot legend and example code. """ return self.net.plot(filename, show, inputs, outputs, solution) diff --git a/graphkit/network.py b/graphkit/network.py index 1c246530..ba9d3a76 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -381,7 +381,26 @@ def plot(self, filename=None, show=False, """ Plot a *Graphviz* graph and return it, if no other argument provided. - See :func:`network.plot_graph()` for arguments, legend, and example code. + :param str filename: + Write diagram into a file. + The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` + Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. + :param boolean show: + If it evaluates to true, opens the diagram in a matplotlib window. + :param inputs: + an optional name list, any nodes in there are plotted + as a "house" + :param outputs: + an optional name list, any nodes in there are plotted + as an "inverted-house" + :param solution: + an optional dict with values to annotate nodes + (currently content not shown, but node drawn as "filled") + + :return: + An instance of the :mod`pydot` graph + + See :func:`network.plot_graph` for the plot legend and example code. """ return plot_graph(self.graph, filename, show, self.steps, inputs, outputs, solution) @@ -476,24 +495,25 @@ def plot_graph(graph, filename=None, show=False, steps=None, :param graph: what to plot :param str filename: - Write the output to a file. + Write diagram into a file. The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. :param boolean show: - If this is set to True, use matplotlib to show the graph diagram - (Default: False) + If it evaluates to true, opens the diagram in a matplotlib window. :param steps: a list of nodes & instructions to overlay on the diagram :param inputs: - an optional list, any nodes in there are plotted as `"house" - `_ + an optional name list, any nodes in there are plotted + as a "house" :param outputs: - an optional list, any nodes in there are plotted as `"invhouse" - :param outputs: - an optional dict, any values in there are included in the node-name + an optional name list, any nodes in there are plotted + as an "inverted-house" + :param solution: + an optional dict with values to annotate nodes + (currently content not shown, but node drawn as "filled") - :returns: - An instance of the pydot graph + :return: + An instance of the :mod`pydot` graph **Example:** @@ -530,6 +550,7 @@ def get_node_name(a): kw = {'color': 'red', 'penwidth': 2} # SHAPE change if in inputs/outputs. + # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html shape="rect" if inputs and outputs and nx_node in inputs and nx_node in outputs: shape="hexagon" From dc5a21a6128a9710ec855b9d08465452caed9d89 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 18:29:36 +0300 Subject: [PATCH 051/167] FIX(PLOT.TC): TC was always testing PNG, ... + retorfitted to try all available formats. + list of forbidden formats based on my failres --- graphkit/base.py | 4 ++-- graphkit/network.py | 32 +++++++++++++------------------- test/test_graphkit.py | 34 ++++++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 1298b566..5f425028 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -176,8 +176,8 @@ def plot(self, filename=None, show=False, """ :param str filename: Write diagram into a file. - The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` - Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`network.supported_plot_formats()` for more. :param boolean show: If it evaluates to true, opens the diagram in a matplotlib window. :param inputs: diff --git a/graphkit/network.py b/graphkit/network.py index ba9d3a76..82b7d128 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -383,8 +383,8 @@ def plot(self, filename=None, show=False, :param str filename: Write diagram into a file. - The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` - Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`network.supported_plot_formats()` for more. :param boolean show: If it evaluates to true, opens the diagram in a matplotlib window. :param inputs: @@ -454,15 +454,10 @@ def get_data_node(name, graph): return None -def supported_plot_writers(): - return { - ".png": lambda gplot: gplot.create_png(), - ".dot": lambda gplot: gplot.to_string(), - ".jpg": lambda gplot: gplot.create_jpeg(), - ".jpeg": lambda gplot: gplot.create_jpeg(), - ".pdf": lambda gplot: gplot.create_pdf(), - ".svg": lambda gplot: gplot.create_svg(), - } +def supported_plot_formats(): + import pydot + + return [".%s" % f for f in pydot.Dot().formats] def plot_graph(graph, filename=None, show=False, steps=None, @@ -496,8 +491,8 @@ def plot_graph(graph, filename=None, show=False, steps=None, what to plot :param str filename: Write diagram into a file. - The extension must be one of: ``.png .dot .jpg .jpeg .pdf .svg`` - Prefer ``.pdf`` or ``.svg`` to see solution-values in tooltips. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`network.supported_plot_formats()` for more. :param boolean show: If it evaluates to true, opens the diagram in a matplotlib window. :param steps: @@ -603,16 +598,15 @@ def get_node_name(a): # save plot if filename: + formats = supported_plot_formats() _basename, ext = os.path.splitext(filename) - writers = supported_plot_writers() - plot_writer = supported_plot_writers().get(ext.lower()) - if not plot_writer: + if not ext.lower() in formats: raise ValueError( "Unknown file format for saving graph: %s" " File extensions must be one of: %s" - % (ext, ' '.join(writers))) - with open(filename, "wb") as fh: - fh.write(plot_writer(g)) + % (ext, " ".join(formats))) + + g.write(filename, format=ext.lower()[1:]) # display graph via matplotlib if show: diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 927632e3..fb7620b8 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -116,14 +116,27 @@ def test_plotting(): inputs = {'a': 1, 'b1': 2} solution=pipeline(inputs) - for ext in network.supported_plot_writers(): - tdir = tempfile.mkdtemp(suffix=ext) - png_file = osp.join(tdir, "workflow.png") - pipeline.plot(png_file, inputs=inputs, solution=solution, outputs=['asked', 'b1']) - try: - assert osp.exists(png_file) - finally: - shutil.rmtree(tdir, ignore_errors=True) + # ...not working on my PC ... + forbidden_formats = ".dia .hpgl .mif .pcl .pic .vtx .xlib".split() + tdir = tempfile.mkdtemp() + counter = 0 + try: + for ext in network.supported_plot_formats(): + if ext in forbidden_formats: + continue + + counter += 1 + fpath = osp.join(tdir, "workflow-%i%s" % (counter, ext)) + pipeline.plot(fpath, inputs=inputs, solution=solution, outputs=['asked', 'b1']) + assert osp.exists(fpath) + + counter += 1 + fpath = osp.join(tdir, "workflow-%i%s" % (counter, ext)) + pipeline.plot(fpath) + assert osp.exists(fpath) + finally: + shutil.rmtree(tdir, ignore_errors=True) + try: pipeline.plot('bad.format') assert False, "Should had failed writting arbitrary file format!" @@ -131,12 +144,13 @@ def test_plotting(): assert "Unknown file format" in str(ex) ## Check help msg lists all siupported formats - for ext in network.supported_plot_writers(): + for ext in network.supported_plot_formats(): assert ext in str(ex) def test_plotting_docstring(): - for ext in network.supported_plot_writers(): + common_formats = ".png .dot .jpg .jpeg .pdf .svg".split() + for ext in common_formats: assert ext in network.plot_graph.__doc__ From 782d9b9ee96bacd55732a657309358e7d2d2e8c5 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 18:31:27 +0300 Subject: [PATCH 052/167] fix(plot): don't require Matplotlib if no Window asked --- graphkit/network.py | 6 ++++-- test/test_graphkit.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 82b7d128..3351ee94 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -455,6 +455,7 @@ def get_data_node(name, graph): def supported_plot_formats(): + """return automatically all `pydot` extensions withlike ``.png``""" import pydot return [".%s" % f for f in pydot.Dot().formats] @@ -524,8 +525,6 @@ def plot_graph(graph, filename=None, show=False, steps=None, """ import pydot - import matplotlib.pyplot as plt - import matplotlib.image as mpimg assert graph is not None @@ -610,6 +609,9 @@ def get_node_name(a): # display graph via matplotlib if show: + import matplotlib.pyplot as plt + import matplotlib.image as mpimg + png = g.create_png() sio = io.BytesIO(png) img = mpimg.imread(sio) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index fb7620b8..d41572cc 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -117,7 +117,8 @@ def test_plotting(): solution=pipeline(inputs) # ...not working on my PC ... - forbidden_formats = ".dia .hpgl .mif .pcl .pic .vtx .xlib".split() + forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() + tdir = tempfile.mkdtemp() counter = 0 try: From 7d389c3f34b9c897aabd55d4dcdf7de8850fa349 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 18:50:11 +0300 Subject: [PATCH 053/167] test(plot): check also matplotlib show=True --- graphkit/network.py | 4 +++- setup.py | 6 +++++- test/test_graphkit.py | 12 ++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 3351ee94..140d0b2e 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -496,6 +496,7 @@ def plot_graph(graph, filename=None, show=False, steps=None, call :func:`network.supported_plot_formats()` for more. :param boolean show: If it evaluates to true, opens the diagram in a matplotlib window. + If it equals ``-1``, it plots but does not open the Window. :param steps: a list of nodes & instructions to overlay on the diagram :param inputs: @@ -616,6 +617,7 @@ def get_node_name(a): sio = io.BytesIO(png) img = mpimg.imread(sio) plt.imshow(img, aspect="equal") - plt.show() + if show != -1: + plt.show() return g diff --git a/setup.py b/setup.py index d3dfec84..4ed30ff8 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,11 @@ extras_require={ 'plot': ['pydot', 'matplotlib'] }, - tests_require=['numpy'], + tests_require=[ + "numpy", + "pydot", # to test plot + "matplotlib" # to test plot + ], license='Apache-2.0', keywords=['graph', 'computation graph', 'DAG', 'directed acyclical graph'], classifiers=[ diff --git a/test/test_graphkit.py b/test/test_graphkit.py index d41572cc..c4d8a20f 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -5,6 +5,7 @@ import pickle import os.path as osp import shutil +import sys import tempfile @@ -138,6 +139,17 @@ def test_plotting(): finally: shutil.rmtree(tdir, ignore_errors=True) + ## Don't open matplotlib window. + # + if sys.version_info < (3, 5): + # On PY< 3.5 it fails with: + # nose.proxy.TclError: no display name and no $DISPLAY environment variable + # eg https://travis-ci.org/ankostis/graphkit/jobs/593957996 + import matplotlib + matplotlib.use("Agg") + # do not open window in headless travis + assert pipeline.plot(show=-1) + try: pipeline.plot('bad.format') assert False, "Should had failed writting arbitrary file format!" From 3fe0b404594d347b7f481197be89b17f1a5ae8c1 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 19:19:05 +0300 Subject: [PATCH 054/167] ENH(plot): return SVG rendered in JUPYTER, ... + doc: rename in sample code: netop --> pipeline. + enh(build): add `ipython` in test dependencies. + include it in the plot TC. --- graphkit/base.py | 10 +++++++--- graphkit/network.py | 44 ++++++++++++++++++++++++++++++------------- setup.py | 1 + test/test_graphkit.py | 9 ++++++++- 4 files changed, 47 insertions(+), 17 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 5f425028..36ccca3c 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -171,15 +171,19 @@ def set_execution_method(self, method): assert method in options self._execution_method = method - def plot(self, filename=None, show=False, + def plot(self, filename=None, show=False, jupyter=None, inputs=None, outputs=None, solution=None): """ :param str filename: Write diagram into a file. Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` call :func:`network.supported_plot_formats()` for more. - :param boolean show: + :param show: If it evaluates to true, opens the diagram in a matplotlib window. + If it equals `-1`, it plots but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). :param inputs: an optional name list, any nodes in there are plotted as a "house" @@ -195,7 +199,7 @@ def plot(self, filename=None, show=False, See :func:`network.plot_graph()` for the plot legend and example code. """ - return self.net.plot(filename, show, inputs, outputs, solution) + return self.net.plot(filename, show, jupyter, inputs, outputs, solution) def __getstate__(self): state = Operation.__getstate__(self) diff --git a/graphkit/network.py b/graphkit/network.py index 140d0b2e..6dc4d48a 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -376,7 +376,7 @@ def _compute_sequential_method(self, named_inputs, outputs): return {k: cache[k] for k in iter(cache) if k in outputs} - def plot(self, filename=None, show=False, + def plot(self, filename=None, show=False, jupyter=None, inputs=None, outputs=None, solution=None): """ Plot a *Graphviz* graph and return it, if no other argument provided. @@ -385,8 +385,12 @@ def plot(self, filename=None, show=False, Write diagram into a file. Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` call :func:`network.supported_plot_formats()` for more. - :param boolean show: + :param show: If it evaluates to true, opens the diagram in a matplotlib window. + If it equals `-1``, it plots but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). :param inputs: an optional name list, any nodes in there are plotted as a "house" @@ -402,8 +406,8 @@ def plot(self, filename=None, show=False, See :func:`network.plot_graph` for the plot legend and example code. """ - return plot_graph(self.graph, filename, show, self.steps, - inputs, outputs, solution) + return plot_graph(self.graph, filename, show, jupyter, + self.steps, inputs, outputs, solution) def ready_to_schedule_operation(op, has_executed, graph): @@ -461,8 +465,8 @@ def supported_plot_formats(): return [".%s" % f for f in pydot.Dot().formats] -def plot_graph(graph, filename=None, show=False, steps=None, - inputs=None, outputs=None, solution=None): +def plot_graph(graph, filename=None, show=False, jupyter=False, + steps=None, inputs=None, outputs=None, solution=None): """ Plot a *Graphviz* graph/steps and return it, if no other argument provided. @@ -494,9 +498,12 @@ def plot_graph(graph, filename=None, show=False, steps=None, Write diagram into a file. Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` call :func:`network.supported_plot_formats()` for more. - :param boolean show: + :param show: If it evaluates to true, opens the diagram in a matplotlib window. - If it equals ``-1``, it plots but does not open the Window. + If it equals `-1``, it plots but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). :param steps: a list of nodes & instructions to overlay on the diagram :param inputs: @@ -514,15 +521,18 @@ def plot_graph(graph, filename=None, show=False, steps=None, **Example:** - >>> netop = compose(name="netop")( + >>> from graphkit import compose, operation + >>> from graphkit.modifiers import optional + + >>> pipeline = compose(name="pipeline")( ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), ... ) >>> inputs = {'a': 1, 'b1': 2} - >>> solution=netop(inputs) - >>> netop.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); + >>> solution=pipeline(inputs) + >>> pipeline.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); """ import pydot @@ -596,7 +606,8 @@ def get_node_name(a): penwidth=3, arrowhead="vee") g.add_edge(edge) - # save plot + # Save plot + # if filename: formats = supported_plot_formats() _basename, ext = os.path.splitext(filename) @@ -608,7 +619,14 @@ def get_node_name(a): g.write(filename, format=ext.lower()[1:]) - # display graph via matplotlib + ## Return an SVG renderable in jupyter. + # + if jupyter: + from IPython.display import SVG + g = SVG(data=g.create_svg()) + + ## Display graph via matplotlib + # if show: import matplotlib.pyplot as plt import matplotlib.image as mpimg diff --git a/setup.py b/setup.py index 4ed30ff8..1448a241 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ }, tests_require=[ "numpy", + "ipython; python_version >= '3.5'", # to test jupyter plot. "pydot", # to test plot "matplotlib" # to test plot ], diff --git a/test/test_graphkit.py b/test/test_graphkit.py index c4d8a20f..4cd283f2 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -139,7 +139,8 @@ def test_plotting(): finally: shutil.rmtree(tdir, ignore_errors=True) - ## Don't open matplotlib window. + ## Try matplotlib Window, but + # without opening a Window. # if sys.version_info < (3, 5): # On PY< 3.5 it fails with: @@ -150,6 +151,12 @@ def test_plotting(): # do not open window in headless travis assert pipeline.plot(show=-1) + ## Try Jupyter SVG. + # + # but latest ipython-7+ dropped < PY3.4 + if sys.version_info >= (3, 5): + assert "display.SVG" in str(type(pipeline.plot(jupyter=True))) + try: pipeline.plot('bad.format') assert False, "Should had failed writting arbitrary file format!" From 1471551b30ce22ab668b4d18bf8d20ceaea5061f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 19:40:47 +0300 Subject: [PATCH 055/167] refact(plot.TC): avoid writting multiple temp-files --- test/test_graphkit.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 4cd283f2..07c2654d 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -117,25 +117,35 @@ def test_plotting(): inputs = {'a': 1, 'b1': 2} solution=pipeline(inputs) - # ...not working on my PC ... - forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() + ## Generate all formats + # (not needing to save files) + # + # ...these are not working on my PC, or travis. + forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() + prev_dot = None + for ext in network.supported_plot_formats(): + if ext in forbidden_formats: + continue + + dot = pipeline.plot(inputs=inputs, solution=solution, outputs=['asked', 'b1']) + assert dot + assert dot != prev_dot + prev_dot = dot + + dot = pipeline.plot() + assert dot + assert dot != prev_dot + prev_dot = dot + + ## Try saving one file. + # tdir = tempfile.mkdtemp() - counter = 0 + fpath = osp.join(tdir, "workflow.png") try: - for ext in network.supported_plot_formats(): - if ext in forbidden_formats: - continue - - counter += 1 - fpath = osp.join(tdir, "workflow-%i%s" % (counter, ext)) - pipeline.plot(fpath, inputs=inputs, solution=solution, outputs=['asked', 'b1']) - assert osp.exists(fpath) - - counter += 1 - fpath = osp.join(tdir, "workflow-%i%s" % (counter, ext)) - pipeline.plot(fpath) - assert osp.exists(fpath) + dot = pipeline.plot(fpath, inputs=inputs, solution=solution, outputs=['asked', 'b1']) + assert osp.exists(fpath) + assert dot finally: shutil.rmtree(tdir, ignore_errors=True) From b4401963eb103cd7b6fa225fb0dad0153efa89a2 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 20:06:48 +0300 Subject: [PATCH 056/167] fix(build): reuse dependencies definitions --- setup.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 6ce52009..654f4e13 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,16 @@ with io.open('graphkit/__init__.py', 'rt', encoding='utf8') as f: version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) +plot_reqs = [ + "ipython; python_version >= '3.5'", # to test jupyter plot. + "matplotlib", # to test plot + "pydot", # to test plot +] +test_reqs = plot_reqs + [ + "pytest", + "pytest-cov", +] + setup( name='graphkit', version=version, @@ -33,16 +43,10 @@ "networkx == 2.2; python_version < '3.5'", ], extras_require={ - 'plot': ['pydot', 'matplotlib'], - 'test': ['pydot', 'matplotlib', 'pytest', "pytest-cov"], + 'plot': plot_reqs, + 'test': test_reqs, }, - tests_require=[ - "pytest", - "pytest-cov", - "ipython; python_version >= '3.5'", # to test jupyter plot. - "pydot", # to test plot - "matplotlib" # to test plot - ], + tests_require=test_reqs, license='Apache-2.0', keywords=['graph', 'computation graph', 'DAG', 'directed acyclical graph'], classifiers=[ From bde9b64a0000d5b0ca798d39d34c9043d2bc80fd Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 21:17:10 +0300 Subject: [PATCH 057/167] REFACT(plot.TC): PYTESTize and parametrize --- test/test_graphkit.py | 81 -------------------------------- test/test_plot.py | 106 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 81 deletions(-) create mode 100644 test/test_plot.py diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 1afdb740..7db2e973 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -3,11 +3,6 @@ import math import pickle -import os.path as osp -import shutil -import sys -import tempfile - from pprint import pprint from operator import add @@ -109,82 +104,6 @@ def test_network_deep_merge(): pprint(net3({'a': 1, 'b': 2, 'c': 4})) -def test_plotting(): - pipeline = compose(name="netop")( - operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), - operation(name="sub", needs=["a", modifiers.optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), - operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), - ) - inputs = {'a': 1, 'b1': 2} - solution=pipeline(inputs) - - - ## Generate all formats - # (not needing to save files) - # - # ...these are not working on my PC, or travis. - forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() - prev_dot = None - for ext in network.supported_plot_formats(): - if ext in forbidden_formats: - continue - - dot = pipeline.plot(inputs=inputs, solution=solution, outputs=['asked', 'b1']) - assert dot - assert dot != prev_dot - prev_dot = dot - - dot = pipeline.plot() - assert dot - assert dot != prev_dot - prev_dot = dot - - ## Try saving one file. - # - tdir = tempfile.mkdtemp() - fpath = osp.join(tdir, "workflow.png") - try: - dot = pipeline.plot(fpath, inputs=inputs, solution=solution, outputs=['asked', 'b1']) - assert osp.exists(fpath) - assert dot - finally: - shutil.rmtree(tdir, ignore_errors=True) - - ## Try matplotlib Window, but - # without opening a Window. - # - if sys.version_info < (3, 5): - # On PY< 3.5 it fails with: - # nose.proxy.TclError: no display name and no $DISPLAY environment variable - # eg https://travis-ci.org/ankostis/graphkit/jobs/593957996 - import matplotlib - matplotlib.use("Agg") - # do not open window in headless travis - assert pipeline.plot(show=-1) - - ## Try Jupyter SVG. - # - # but latest ipython-7+ dropped < PY3.4 - if sys.version_info >= (3, 5): - assert "display.SVG" in str(type(pipeline.plot(jupyter=True))) - - try: - pipeline.plot('bad.format') - assert False, "Should had failed writting arbitrary file format!" - except ValueError as ex: - assert "Unknown file format" in str(ex) - - ## Check help msg lists all siupported formats - for ext in network.supported_plot_formats(): - assert ext in str(ex) - - -def test_plotting_docstring(): - common_formats = ".png .dot .jpg .jpeg .pdf .svg".split() - for ext in common_formats: - assert ext in network.plot_graph.__doc__ - - def test_input_based_pruning(): # Tests to make sure we don't need to pass graph inputs if we're provided # with data further downstream in the graph as an input. diff --git a/test/test_plot.py b/test/test_plot.py new file mode 100644 index 00000000..90ba87f9 --- /dev/null +++ b/test/test_plot.py @@ -0,0 +1,106 @@ +# Copyright 2016, Yahoo Inc. +# Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. + +from operator import add + +import pytest +import sys + +from graphkit import compose, network, operation +from graphkit.modifiers import optional + + +@pytest.fixture +def pipeline(): + return compose(name="netop")( + operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), + operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])( + lambda a, b=1: a - b + ), + operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), + ) + + +@pytest.fixture(params=[{"a": 1}, {"a": 1, "b1": 2}]) +def inputs(request): + return {"a": 1, "b1": 2} + + +@pytest.fixture(params=[None, ("a", "b1")]) +def input_names(request): + return request.param + + +@pytest.fixture(params=[None, ["asked", "b1"]]) +def outputs(request): + return request.param + + +@pytest.fixture(params=[None, 1]) +def solution(pipeline, inputs, outputs, request): + return request.param and pipeline(inputs, outputs) + + +###### TEST CASES ####### +## + + +def test_plotting_docstring(): + common_formats = ".png .dot .jpg .jpeg .pdf .svg".split() + for ext in common_formats: + assert ext in network.plot_graph.__doc__ + + +def test_plot_formats(pipeline, input_names, outputs, solution, tmp_path): + ## Generate all formats (not needing to save files) + + # ...these are not working on my PC, or travis. + forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() + prev_dot = None + for ext in network.supported_plot_formats(): + if ext not in forbidden_formats: + dot = pipeline.plot(inputs=input_names, outputs=outputs, solution=solution) + assert dot + assert ext == ".jpg" or dot != prev_dot + prev_dot = dot + + +def test_plot_bad_format(pipeline, tmp_path): + with pytest.raises(ValueError, match="Unknown file format") as exinfo: + pipeline.plot(filename="bad.format") + + ## Check help msg lists all siupported formats + for ext in network.supported_plot_formats(): + assert exinfo.match(ext) + + +def test_plot_write_file(pipeline, tmp_path): + # Try saving a file from one format. + + fpath = tmp_path / "workflow.png" + + dot = pipeline.plot(str(fpath)) + assert fpath.exists() + assert dot + + +def test_plot_matplib(pipeline, tmp_path): + ## Try matplotlib Window, but # without opening a Window. + + if sys.version_info < (3, 5): + # On PY< 3.5 it fails with: + # nose.proxy.TclError: no display name and no $DISPLAY environment variable + # eg https://travis-ci.org/ankostis/graphkit/jobs/593957996 + import matplotlib + + matplotlib.use("Agg") + # do not open window in headless travis + assert pipeline.plot(show=-1) + + +@pytest.mark.skipif(sys.version_info < (3, 5), reason="ipython-7+ dropped PY3.4-") +def test_plot_jupyter(pipeline, tmp_path): + ## Try returned Jupyter SVG. + + dot = pipeline.plot(jupyter=True) + assert "display.SVG" in str(type(dot)) From 8e361e6b3e463afe48eb94ce0feac59a37421c5e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 21:34:00 +0300 Subject: [PATCH 058/167] REFACT(PLOT): MOVE PLOT in own module --- graphkit/base.py | 2 +- graphkit/network.py | 186 ++------------------------------------- graphkit/plot.py | 208 ++++++++++++++++++++++++++++++++++++++++++++ test/test_plot.py | 8 +- 4 files changed, 219 insertions(+), 185 deletions(-) create mode 100644 graphkit/plot.py diff --git a/graphkit/base.py b/graphkit/base.py index 36ccca3c..140b7a97 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -197,7 +197,7 @@ def plot(self, filename=None, show=False, jupyter=None, :return: An instance of the :mod`pydot` graph - See :func:`network.plot_graph()` for the plot legend and example code. + See :func:`graphkit.plot.plot_graph()` for the plot legend and example code. """ return self.net.plot(filename, show, jupyter, inputs, outputs, solution) diff --git a/graphkit/network.py b/graphkit/network.py index 6dc4d48a..fa94f822 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -1,13 +1,11 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -import io -import os import time import networkx as nx -from .base import Operation, NetworkOperation +from .base import Operation from .modifiers import optional @@ -404,9 +402,11 @@ def plot(self, filename=None, show=False, jupyter=None, :return: An instance of the :mod`pydot` graph - See :func:`network.plot_graph` for the plot legend and example code. + See :func:`graphkit.plot.plot_graph()` for the plot legend and example code. """ - return plot_graph(self.graph, filename, show, jupyter, + from . import plot + + return plot.plot_graph(self.graph, filename, show, jupyter, self.steps, inputs, outputs, solution) @@ -463,179 +463,3 @@ def supported_plot_formats(): import pydot return [".%s" % f for f in pydot.Dot().formats] - - -def plot_graph(graph, filename=None, show=False, jupyter=False, - steps=None, inputs=None, outputs=None, solution=None): - """ - Plot a *Graphviz* graph/steps and return it, if no other argument provided. - - Legend: - - - NODES: - - - **circle**: function - - **oval**: subgraph function - - **house**: given input - - **inversed-house**: asked output - - **polygon**: given both as input & asked as output (what?) - - **square**: intermediate data, neither given nor asked. - - **red frame**: delete-instruction, to free up memory. - - **filled**: data node has a value in `solution`, shown in tooltip. - - **thick frame**: function/data node visited. - - ARROWS - - - **solid black arrows**: dependencies (source-data are``need``\ed - by target-operations, sources-operations ``provide`` target-data) - - **dashed black arrows**: optional needs - - **green-dotted arrows**: execution steps labeled in succession - - :param graph: - what to plot - :param str filename: - Write diagram into a file. - Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`network.supported_plot_formats()` for more. - :param show: - If it evaluates to true, opens the diagram in a matplotlib window. - If it equals `-1``, it plots but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). - :param steps: - a list of nodes & instructions to overlay on the diagram - :param inputs: - an optional name list, any nodes in there are plotted - as a "house" - :param outputs: - an optional name list, any nodes in there are plotted - as an "inverted-house" - :param solution: - an optional dict with values to annotate nodes - (currently content not shown, but node drawn as "filled") - - :return: - An instance of the :mod`pydot` graph - - **Example:** - - >>> from graphkit import compose, operation - >>> from graphkit.modifiers import optional - - >>> pipeline = compose(name="pipeline")( - ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), - ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), - ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), - ... ) - - >>> inputs = {'a': 1, 'b1': 2} - >>> solution=pipeline(inputs) - >>> pipeline.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); - - """ - import pydot - - assert graph is not None - - def get_node_name(a): - if isinstance(a, Operation): - return a.name - return a - - g = pydot.Dot(graph_type="digraph") - - # draw nodes - for nx_node in graph.nodes: - kw = {} - if isinstance(nx_node, str): - # Only DeleteInstructions data in steps. - if nx_node in steps: - kw = {'color': 'red', 'penwidth': 2} - - # SHAPE change if in inputs/outputs. - # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html - shape="rect" - if inputs and outputs and nx_node in inputs and nx_node in outputs: - shape="hexagon" - else: - if inputs and nx_node in inputs: - shape="invhouse" - if outputs and nx_node in outputs: - shape="house" - - # LABEL change from solution. - if solution and nx_node in solution: - kw["style"] = "filled" - kw["fillcolor"] = "gray" - # kw["tooltip"] = nx_node, solution.get(nx_node) - node = pydot.Node(name=nx_node, shape=shape, - URL="fdgfdf", **kw) - else: # Operation - kw = {} - shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" - if nx_node in steps: - kw["style"] = "bold" - node = pydot.Node(name=nx_node.name, shape=shape, **kw) - - g.add_node(node) - - # draw edges - for src, dst in graph.edges: - src_name = get_node_name(src) - dst_name = get_node_name(dst) - kw = {} - if isinstance(dst, Operation) and any(n == src - and isinstance(n, optional) - for n in dst.needs): - kw["style"] = "dashed" - edge = pydot.Edge(src=src_name, dst=dst_name, **kw) - g.add_edge(edge) - - # draw steps sequence - if steps and len(steps) > 1: - it1 = iter(steps) - it2 = iter(steps); next(it2) - for i, (src, dst) in enumerate(zip(it1, it2), 1): - src_name = get_node_name(src) - dst_name = get_node_name(dst) - edge = pydot.Edge( - src=src_name, dst=dst_name, label=str(i), style='dotted', - color="green", fontcolor="green", fontname="bold", fontsize=18, - penwidth=3, arrowhead="vee") - g.add_edge(edge) - - # Save plot - # - if filename: - formats = supported_plot_formats() - _basename, ext = os.path.splitext(filename) - if not ext.lower() in formats: - raise ValueError( - "Unknown file format for saving graph: %s" - " File extensions must be one of: %s" - % (ext, " ".join(formats))) - - g.write(filename, format=ext.lower()[1:]) - - ## Return an SVG renderable in jupyter. - # - if jupyter: - from IPython.display import SVG - g = SVG(data=g.create_svg()) - - ## Display graph via matplotlib - # - if show: - import matplotlib.pyplot as plt - import matplotlib.image as mpimg - - png = g.create_png() - sio = io.BytesIO(png) - img = mpimg.imread(sio) - plt.imshow(img, aspect="equal") - if show != -1: - plt.show() - - return g diff --git a/graphkit/plot.py b/graphkit/plot.py new file mode 100644 index 00000000..65d7d4a2 --- /dev/null +++ b/graphkit/plot.py @@ -0,0 +1,208 @@ +# Copyright 2016, Yahoo Inc. +# Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. + +import io +import os + +from .base import NetworkOperation, Operation +from .modifiers import optional + + +def supported_plot_formats(): + """return automatically all `pydot` extensions withlike ``.png``""" + import pydot + + return [".%s" % f for f in pydot.Dot().formats] + + +def plot_graph( + graph, + filename=None, + show=False, + jupyter=False, + steps=None, + inputs=None, + outputs=None, + solution=None, +): + """ + Plot a *Graphviz* graph/steps and return it, if no other argument provided. + + Legend: + + + NODES: + + - **circle**: function + - **oval**: subgraph function + - **house**: given input + - **inversed-house**: asked output + - **polygon**: given both as input & asked as output (what?) + - **square**: intermediate data, neither given nor asked. + - **red frame**: delete-instruction, to free up memory. + - **filled**: data node has a value in `solution`, shown in tooltip. + - **thick frame**: function/data node visited. + + ARROWS + + - **solid black arrows**: dependencies (source-data are``need``\ed + by target-operations, sources-operations ``provide`` target-data) + - **dashed black arrows**: optional needs + - **green-dotted arrows**: execution steps labeled in succession + + :param graph: + what to plot + :param str filename: + Write diagram into a file. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`network.supported_plot_formats()` for more. + :param show: + If it evaluates to true, opens the diagram in a matplotlib window. + If it equals `-1``, it plots but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). + :param steps: + a list of nodes & instructions to overlay on the diagram + :param inputs: + an optional name list, any nodes in there are plotted + as a "house" + :param outputs: + an optional name list, any nodes in there are plotted + as an "inverted-house" + :param solution: + an optional dict with values to annotate nodes + (currently content not shown, but node drawn as "filled") + + :return: + An instance of the :mod`pydot` graph + + **Example:** + + >>> from graphkit import compose, operation + >>> from graphkit.modifiers import optional + + >>> pipeline = compose(name="pipeline")( + ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), + ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), + ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), + ... ) + + >>> inputs = {'a': 1, 'b1': 2} + >>> solution=pipeline(inputs) + >>> pipeline.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); + + """ + import pydot + + assert graph is not None + + def get_node_name(a): + if isinstance(a, Operation): + return a.name + return a + + g = pydot.Dot(graph_type="digraph") + + # draw nodes + for nx_node in graph.nodes: + kw = {} + if isinstance(nx_node, str): + # Only DeleteInstructions data in steps. + if nx_node in steps: + kw = {"color": "red", "penwidth": 2} + + # SHAPE change if in inputs/outputs. + # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html + shape = "rect" + if inputs and outputs and nx_node in inputs and nx_node in outputs: + shape = "hexagon" + else: + if inputs and nx_node in inputs: + shape = "invhouse" + if outputs and nx_node in outputs: + shape = "house" + + # LABEL change from solution. + if solution and nx_node in solution: + kw["style"] = "filled" + kw["fillcolor"] = "gray" + # kw["tooltip"] = nx_node, solution.get(nx_node) + node = pydot.Node(name=nx_node, shape=shape, URL="fdgfdf", **kw) + else: # Operation + kw = {} + shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" + if nx_node in steps: + kw["style"] = "bold" + node = pydot.Node(name=nx_node.name, shape=shape, **kw) + + g.add_node(node) + + # draw edges + for src, dst in graph.edges: + src_name = get_node_name(src) + dst_name = get_node_name(dst) + kw = {} + if isinstance(dst, Operation) and any( + n == src and isinstance(n, optional) for n in dst.needs + ): + kw["style"] = "dashed" + edge = pydot.Edge(src=src_name, dst=dst_name, **kw) + g.add_edge(edge) + + # draw steps sequence + if steps and len(steps) > 1: + it1 = iter(steps) + it2 = iter(steps) + next(it2) + for i, (src, dst) in enumerate(zip(it1, it2), 1): + src_name = get_node_name(src) + dst_name = get_node_name(dst) + edge = pydot.Edge( + src=src_name, + dst=dst_name, + label=str(i), + style="dotted", + color="green", + fontcolor="green", + fontname="bold", + fontsize=18, + penwidth=3, + arrowhead="vee", + ) + g.add_edge(edge) + + # Save plot + # + if filename: + formats = supported_plot_formats() + _basename, ext = os.path.splitext(filename) + if not ext.lower() in formats: + raise ValueError( + "Unknown file format for saving graph: %s" + " File extensions must be one of: %s" % (ext, " ".join(formats)) + ) + + g.write(filename, format=ext.lower()[1:]) + + ## Return an SVG renderable in jupyter. + # + if jupyter: + from IPython.display import SVG + + g = SVG(data=g.create_svg()) + + ## Display graph via matplotlib + # + if show: + import matplotlib.pyplot as plt + import matplotlib.image as mpimg + + png = g.create_png() + sio = io.BytesIO(png) + img = mpimg.imread(sio) + plt.imshow(img, aspect="equal") + if show != -1: + plt.show() + + return g diff --git a/test/test_plot.py b/test/test_plot.py index 90ba87f9..1ba2cc0b 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -1,12 +1,12 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +import sys from operator import add import pytest -import sys -from graphkit import compose, network, operation +from graphkit import base, compose, network, operation, plot from graphkit.modifiers import optional @@ -48,7 +48,9 @@ def solution(pipeline, inputs, outputs, request): def test_plotting_docstring(): common_formats = ".png .dot .jpg .jpeg .pdf .svg".split() for ext in common_formats: - assert ext in network.plot_graph.__doc__ + assert ext in plot.plot_graph.__doc__ + assert ext in base.NetworkOperation.plot.__doc__ + assert ext in network.Network.plot.__doc__ def test_plot_formats(pipeline, input_names, outputs, solution, tmp_path): From b08a3631cfd600e6b9efa0df389cacba1f72c2c7 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 21:38:04 +0300 Subject: [PATCH 059/167] DROP PY3.4 - add PY3.6, PY3.7... ...pytest has problems with 3.4. --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4af0a1c1..cbd8cf82 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,9 @@ language: python python: - "2.7" - - "3.4" - "3.5" + - "3.6" + - "3.7" addons: apt: From 3a879592e9d2650db4fffc198222f42f0b3da258 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 5 Oct 2019 21:46:16 +0300 Subject: [PATCH 060/167] refact(plot): separate graphviz building from IO --- graphkit/base.py | 2 +- graphkit/network.py | 2 +- graphkit/plot.py | 173 +++++++++++++++++++++++--------------------- test/test_plot.py | 4 +- 4 files changed, 94 insertions(+), 87 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 140b7a97..212de939 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -177,7 +177,7 @@ def plot(self, filename=None, show=False, jupyter=None, :param str filename: Write diagram into a file. Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`network.supported_plot_formats()` for more. + call :func:`plot.supported_plot_formats()` for more. :param show: If it evaluates to true, opens the diagram in a matplotlib window. If it equals `-1`, it plots but does not open the Window. diff --git a/graphkit/network.py b/graphkit/network.py index fa94f822..582d0c0a 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -382,7 +382,7 @@ def plot(self, filename=None, show=False, jupyter=None, :param str filename: Write diagram into a file. Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`network.supported_plot_formats()` for more. + call :func:`plot.supported_plot_formats()` for more. :param show: If it evaluates to true, opens the diagram in a matplotlib window. If it equals `-1``, it plots but does not open the Window. diff --git a/graphkit/plot.py b/graphkit/plot.py index 65d7d4a2..bed110d8 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -15,6 +15,90 @@ def supported_plot_formats(): return [".%s" % f for f in pydot.Dot().formats] +def build_pydot(graph, steps=None, inputs=None, outputs=None, solution=None): + """ Build a Graphviz graph """ + import pydot + + assert graph is not None + + def get_node_name(a): + if isinstance(a, Operation): + return a.name + return a + + dot = pydot.Dot(graph_type="digraph") + + # draw nodes + for nx_node in graph.nodes: + kw = {} + if isinstance(nx_node, str): + # Only DeleteInstructions data in steps. + if nx_node in steps: + kw = {"color": "red", "penwidth": 2} + + # SHAPE change if in inputs/outputs. + # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html + shape = "rect" + if inputs and outputs and nx_node in inputs and nx_node in outputs: + shape = "hexagon" + else: + if inputs and nx_node in inputs: + shape = "invhouse" + if outputs and nx_node in outputs: + shape = "house" + + # LABEL change from solution. + if solution and nx_node in solution: + kw["style"] = "filled" + kw["fillcolor"] = "gray" + # kw["tooltip"] = nx_node, solution.get(nx_node) + node = pydot.Node(name=nx_node, shape=shape, URL="fdgfdf", **kw) + else: # Operation + kw = {} + shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" + if nx_node in steps: + kw["style"] = "bold" + node = pydot.Node(name=nx_node.name, shape=shape, **kw) + + dot.add_node(node) + + # draw edges + for src, dst in graph.edges: + src_name = get_node_name(src) + dst_name = get_node_name(dst) + kw = {} + if isinstance(dst, Operation) and any( + n == src and isinstance(n, optional) for n in dst.needs + ): + kw["style"] = "dashed" + edge = pydot.Edge(src=src_name, dst=dst_name, **kw) + dot.add_edge(edge) + + # draw steps sequence + if steps and len(steps) > 1: + it1 = iter(steps) + it2 = iter(steps) + next(it2) + for i, (src, dst) in enumerate(zip(it1, it2), 1): + src_name = get_node_name(src) + dst_name = get_node_name(dst) + edge = pydot.Edge( + src=src_name, + dst=dst_name, + label=str(i), + style="dotted", + color="green", + fontcolor="green", + fontname="bold", + fontsize=18, + penwidth=3, + arrowhead="vee", + ) + dot.add_edge(edge) + + return dot + + def plot_graph( graph, filename=None, @@ -55,7 +139,7 @@ def plot_graph( :param str filename: Write diagram into a file. Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`network.supported_plot_formats()` for more. + call :func:`plot.supported_plot_formats()` for more. :param show: If it evaluates to true, opens the diagram in a matplotlib window. If it equals `-1``, it plots but does not open the Window. @@ -93,84 +177,7 @@ def plot_graph( >>> pipeline.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); """ - import pydot - - assert graph is not None - - def get_node_name(a): - if isinstance(a, Operation): - return a.name - return a - - g = pydot.Dot(graph_type="digraph") - - # draw nodes - for nx_node in graph.nodes: - kw = {} - if isinstance(nx_node, str): - # Only DeleteInstructions data in steps. - if nx_node in steps: - kw = {"color": "red", "penwidth": 2} - - # SHAPE change if in inputs/outputs. - # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html - shape = "rect" - if inputs and outputs and nx_node in inputs and nx_node in outputs: - shape = "hexagon" - else: - if inputs and nx_node in inputs: - shape = "invhouse" - if outputs and nx_node in outputs: - shape = "house" - - # LABEL change from solution. - if solution and nx_node in solution: - kw["style"] = "filled" - kw["fillcolor"] = "gray" - # kw["tooltip"] = nx_node, solution.get(nx_node) - node = pydot.Node(name=nx_node, shape=shape, URL="fdgfdf", **kw) - else: # Operation - kw = {} - shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" - if nx_node in steps: - kw["style"] = "bold" - node = pydot.Node(name=nx_node.name, shape=shape, **kw) - - g.add_node(node) - - # draw edges - for src, dst in graph.edges: - src_name = get_node_name(src) - dst_name = get_node_name(dst) - kw = {} - if isinstance(dst, Operation) and any( - n == src and isinstance(n, optional) for n in dst.needs - ): - kw["style"] = "dashed" - edge = pydot.Edge(src=src_name, dst=dst_name, **kw) - g.add_edge(edge) - - # draw steps sequence - if steps and len(steps) > 1: - it1 = iter(steps) - it2 = iter(steps) - next(it2) - for i, (src, dst) in enumerate(zip(it1, it2), 1): - src_name = get_node_name(src) - dst_name = get_node_name(dst) - edge = pydot.Edge( - src=src_name, - dst=dst_name, - label=str(i), - style="dotted", - color="green", - fontcolor="green", - fontname="bold", - fontsize=18, - penwidth=3, - arrowhead="vee", - ) - g.add_edge(edge) + dot = build_pydot(graph, steps, inputs, outputs, solution) # Save plot # @@ -183,14 +190,14 @@ def get_node_name(a): " File extensions must be one of: %s" % (ext, " ".join(formats)) ) - g.write(filename, format=ext.lower()[1:]) + dot.write(filename, format=ext.lower()[1:]) ## Return an SVG renderable in jupyter. # if jupyter: from IPython.display import SVG - g = SVG(data=g.create_svg()) + dot = SVG(data=dot.create_svg()) ## Display graph via matplotlib # @@ -198,11 +205,11 @@ def get_node_name(a): import matplotlib.pyplot as plt import matplotlib.image as mpimg - png = g.create_png() + png = dot.create_png() sio = io.BytesIO(png) img = mpimg.imread(sio) plt.imshow(img, aspect="equal") if show != -1: plt.show() - return g + return dot diff --git a/test/test_plot.py b/test/test_plot.py index 1ba2cc0b..39ad039f 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -59,7 +59,7 @@ def test_plot_formats(pipeline, input_names, outputs, solution, tmp_path): # ...these are not working on my PC, or travis. forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() prev_dot = None - for ext in network.supported_plot_formats(): + for ext in plot.supported_plot_formats(): if ext not in forbidden_formats: dot = pipeline.plot(inputs=input_names, outputs=outputs, solution=solution) assert dot @@ -72,7 +72,7 @@ def test_plot_bad_format(pipeline, tmp_path): pipeline.plot(filename="bad.format") ## Check help msg lists all siupported formats - for ext in network.supported_plot_formats(): + for ext in plot.supported_plot_formats(): assert exinfo.match(ext) From 4d250d3d30eb1c1ff70020b4bbce0788fc98ed64 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 00:09:37 +0300 Subject: [PATCH 061/167] DROP(net): list/show layers not needed, repr() is ok --- graphkit/network.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index bb66b570..fcc38823 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -166,21 +166,6 @@ def add_op(self, operation): self.graph.add_edge(operation, DataPlaceholderNode(p)) - def list_layers(self, debug=False): - ## TODO: move to ExecutionPlan - # Make a generic plan. - plan = self.compile() - return [n for n in plan if debug or isinstance(n, Operation)] - - - def show_layers(self, debug=False, ret=False): - """Shows info (name, needs, and provides) about all operations in this dag.""" - s = "\n".join(repr(n) for n in self.list_layers(debug=debug)) - if ret: - return s - else: - print(s) - def _build_execution_steps(self, dag, inputs, outputs): """ Create the list of operation-nodes & *instructions* evaluating all From 18191e4a879f712db9e5d84787beac0d353d84dc Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 01:02:38 +0300 Subject: [PATCH 062/167] ENH(plot,net): +plot() on ExecPlan; +PlotMixin ... to avoid copying plot() doc+sig around --- graphkit/base.py | 85 ++++++++++++++++++++++++++++----------------- graphkit/network.py | 57 ++++++++++-------------------- 2 files changed, 71 insertions(+), 71 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index bc1ca99a..7764990d 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -6,6 +6,53 @@ import collections as abc +class PlotMixin(object): + """ + Classes wishing to plot their graphs should inherit this and ... + + implement property ``_plotter`` to return a "partial" callable that somehow + ends up calling :func:`plot.plot_graph()` with the `graph` or any other + args binded appropriately. + The purpose is to avoid copying this function & documentation here around. + """ + + def plot( + self, + filename=None, + show=False, + jupyter=None, + **kws, + ): + """ + :param str filename: + Write diagram into a file. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`plot.supported_plot_formats()` for more. + :param show: + If it evaluates to true, opens the diagram in a matplotlib window. + If it equals `-1`, it plots but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). + :param inputs: + an optional name list, any nodes in there are plotted + as a "house" + :param outputs: + an optional name list, any nodes in there are plotted + as an "inverted-house" + :param solution: + an optional dict with values to annotate nodes + (currently content not shown, but node drawn as "filled") + + :return: + A :mod`pydot` instance + + See :func:`graphkit.plot.plot_graph()` for example code and + the legend of the plots. + """ + return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) + + class Data(object): """ This wraps any data that is consumed or produced @@ -149,7 +196,7 @@ def __repr__(self): self.provides) -class NetworkOperation(Operation): +class NetworkOperation(Operation, PlotMixin): def __init__(self, **kwargs): self.net = kwargs.pop('net') Operation.__init__(self, **kwargs) @@ -158,6 +205,10 @@ def __init__(self, **kwargs): self._execution_method = "sequential" self._overwrites_collector = None + @property + def _plotter(self): + return self.net.plot + def _compute(self, named_inputs, outputs=None): return self.net.compute( named_inputs, outputs, method=self._execution_method, @@ -165,7 +216,7 @@ def _compute(self, named_inputs, outputs=None): def __call__(self, *args, **kwargs): return self._compute(*args, **kwargs) - + def compile(self, *args, **kwargs): return self.net.compile(*args, **kwargs) @@ -202,36 +253,6 @@ def set_overwrites_collector(self, collector): % collector) self._overwrites_collector = collector - def plot(self, filename=None, show=False, jupyter=None, - inputs=None, outputs=None, solution=None): - """ - :param str filename: - Write diagram into a file. - Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`plot.supported_plot_formats()` for more. - :param show: - If it evaluates to true, opens the diagram in a matplotlib window. - If it equals `-1`, it plots but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). - :param inputs: - an optional name list, any nodes in there are plotted - as a "house" - :param outputs: - an optional name list, any nodes in there are plotted - as an "inverted-house" - :param solution: - an optional dict with values to annotate nodes - (currently content not shown, but node drawn as "filled") - - :return: - An instance of the :mod`pydot` graph - - See :func:`graphkit.plot.plot_graph()` for the plot legend and example code. - """ - return self.net.plot(filename, show, jupyter, inputs, outputs, solution) - def __getstate__(self): state = Operation.__getstate__(self) state['net'] = self.__dict__['net'] diff --git a/graphkit/network.py b/graphkit/network.py index fcc38823..8b42d7cf 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -63,6 +63,7 @@ intermediate *calculated* values that are overwritten by intermediate (aka "pinned") input-values. """ +import functools as fnt import logging import os import time @@ -75,7 +76,7 @@ from boltons.setutils import IndexedSet as iset -from .base import Operation +from .base import Operation, PlotMixin from .modifiers import optional @@ -117,7 +118,7 @@ def __repr__(self): return 'PinInstruction("%s")' % self -class Network(object): +class Network(PlotMixin): """ Assemble operations & data into a directed-acyclic-graph (DAG) to run them. @@ -138,6 +139,12 @@ def __init__(self, **kwargs): #: for debugging purposes. self._last_plan = None + @property + def _plotter(self): + from .plot import plot_graph + + return fnt.partial(plot_graph, graph=self.graph) + def add_op(self, operation): """ Adds the given operation and its data requirements to the network graph @@ -440,44 +447,9 @@ def compute( return solution - def plot(self, filename=None, show=False, jupyter=None, - inputs=None, outputs=None, solution=None): - """ - Plot a *Graphviz* graph and return it, if no other argument provided. - - :param str filename: - Write diagram into a file. - Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`plot.supported_plot_formats()` for more. - :param show: - If it evaluates to true, opens the diagram in a matplotlib window. - If it equals `-1``, it plots but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). - :param inputs: - an optional name list, any nodes in there are plotted - as a "house" - :param outputs: - an optional name list, any nodes in there are plotted - as an "inverted-house" - :param solution: - an optional dict with values to annotate nodes - (currently content not shown, but node drawn as "filled") - - :return: - An instance of the :mod`pydot` graph - - See :func:`graphkit.plot.plot_graph()` for the plot legend and example code. - """ - from . import plot - - return plot.plot_graph(self.graph, filename, show, jupyter, - self.steps, inputs, outputs, solution) - - class ExecutionPlan(namedtuple("_ExecPlan", - "net inputs outputs dag broken_edges steps")): + "net inputs outputs dag broken_edges steps"), + PlotMixin): """ The result of the network's compilation phase. @@ -508,6 +480,13 @@ class ExecutionPlan(namedtuple("_ExecPlan", def broken_dag(self): return nx.restricted_view(self.dag, nodes=(), edges=self.broken_edges) + @property + def _plotter(self): + from .plot import plot_graph + + return fnt.partial(plot_graph, graph=self.dag, steps=self.steps, + inputs=self.inputs, outputs=self.outputs) + def get_data_node(self, name): """ Retuen the data node from a graph using its name, or None. From 73415296f5811759f6ba052309304d20486e8bf8 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 03:25:13 +0300 Subject: [PATCH 063/167] enh(plan): repr() --- graphkit/network.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/graphkit/network.py b/graphkit/network.py index 8b42d7cf..38fd32a5 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -487,6 +487,11 @@ def _plotter(self): return fnt.partial(plot_graph, graph=self.dag, steps=self.steps, inputs=self.inputs, outputs=self.outputs) + def __repr__(self): + return ( + "ExecutionPlan:\n +--inputs:%s, \n +--outputs=%s\n +--steps=%s)" + % (self.inputs, self.outputs, self.steps)) + def get_data_node(self, name): """ Retuen the data node from a graph using its name, or None. From 231ada5c4287bb46bf8fffa4e8bffd1942d930d0 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 03:27:17 +0300 Subject: [PATCH 064/167] refact(plot): reorder formats-list with io --- graphkit/plot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index bed110d8..bef42b60 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -8,13 +8,6 @@ from .modifiers import optional -def supported_plot_formats(): - """return automatically all `pydot` extensions withlike ``.png``""" - import pydot - - return [".%s" % f for f in pydot.Dot().formats] - - def build_pydot(graph, steps=None, inputs=None, outputs=None, solution=None): """ Build a Graphviz graph """ import pydot @@ -99,6 +92,13 @@ def get_node_name(a): return dot +def supported_plot_formats(): + """return automatically all `pydot` extensions withlike ``.png``""" + import pydot + + return [".%s" % f for f in pydot.Dot().formats] + + def plot_graph( graph, filename=None, From 80f110abb82def3b6ccac08c0f3c76fb2e64bb64 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 03:28:42 +0300 Subject: [PATCH 065/167] FIX(plot): distinguish Del/Pin cmds; +choice utils - still x4 TCs fail... --- graphkit/plot.py | 51 +++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index bef42b60..95a6c9e5 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -6,6 +6,16 @@ from .base import NetworkOperation, Operation from .modifiers import optional +from .network import DeleteInstruction, PinInstruction + + +def _is_class_value_in_list(lst, cls, value): + return any(isinstance(i, cls) and i == value for i in lst) + + +def _merge_conditions(*conds): + """combines conditions as a choice in binary range, eg, 2 conds --> [0, 3]""" + return sum(int(bool(c)) << i for i, c in enumerate(conds)) def build_pydot(graph, steps=None, inputs=None, outputs=None, solution=None): @@ -23,29 +33,31 @@ def get_node_name(a): # draw nodes for nx_node in graph.nodes: - kw = {} if isinstance(nx_node, str): - # Only DeleteInstructions data in steps. + kw = {} + # FrameColor change by step type if nx_node in steps: - kw = {"color": "red", "penwidth": 2} - - # SHAPE change if in inputs/outputs. + choice = _merge_conditions( + _is_class_value_in_list(steps, DeleteInstruction, nx_node), + _is_class_value_in_list(steps, PinInstruction, nx_node), + ) + # 0 is singled out because `nx_node` exists in `steps`. + color = "NOPE red blue purple".split()[choice] + kw = {"color": color, "penwidth": 2} + + # SHAPE change if with inputs/outputs. # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html - shape = "rect" - if inputs and outputs and nx_node in inputs and nx_node in outputs: - shape = "hexagon" - else: - if inputs and nx_node in inputs: - shape = "invhouse" - if outputs and nx_node in outputs: - shape = "house" - - # LABEL change from solution. + choice = _merge_conditions( + inputs and nx_node in inputs, outputs and nx_node in outputs + ) + shape = "rect invhouse house hexagon".split()[choice] + + # LABEL change with solution. if solution and nx_node in solution: kw["style"] = "filled" kw["fillcolor"] = "gray" # kw["tooltip"] = nx_node, solution.get(nx_node) - node = pydot.Node(name=nx_node, shape=shape, URL="fdgfdf", **kw) + node = pydot.Node(name=nx_node, shape=shape, **kw) else: # Operation kw = {} shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" @@ -144,7 +156,7 @@ def plot_graph( If it evaluates to true, opens the diagram in a matplotlib window. If it equals `-1``, it plots but does not open the Window. :param jupyter: - If it evaluates to true, return an SVG suitable to render + If it evaluates to true, return an SVG suitable to render in *jupyter notebook cells* (`ipython` must be installed). :param steps: a list of nodes & instructions to overlay on the diagram @@ -174,8 +186,11 @@ def plot_graph( >>> inputs = {'a': 1, 'b1': 2} >>> solution=pipeline(inputs) - >>> pipeline.plot('plot.svg', inputs=inputs, solution=solution, outputs=['asked', 'b1']); + >>> pipeline.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); + >>> pipeline.last_plan.plot('plot2.svg', solution=solution); + + The last 2 should plot identical graph diagrams. """ dot = build_pydot(graph, steps, inputs, outputs, solution) From 32eaa8019799baaf7bf786c7671ff13fda8e2ef3 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 03:45:17 +0300 Subject: [PATCH 066/167] refact(plot): inline imports, not to cycle with base... to bring back PlotMixin to change together stuff. --- graphkit/plot.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index 95a6c9e5..39b9fcc0 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -4,10 +4,6 @@ import io import os -from .base import NetworkOperation, Operation -from .modifiers import optional -from .network import DeleteInstruction, PinInstruction - def _is_class_value_in_list(lst, cls, value): return any(isinstance(i, cls) and i == value for i in lst) @@ -21,6 +17,9 @@ def _merge_conditions(*conds): def build_pydot(graph, steps=None, inputs=None, outputs=None, solution=None): """ Build a Graphviz graph """ import pydot + from .base import NetworkOperation, Operation + from .modifiers import optional + from .network import DeleteInstruction, PinInstruction assert graph is not None From 4b70cfb10295d774afc6f52b56a388220357aad5 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 10:40:49 +0300 Subject: [PATCH 067/167] refact(plot): move PlotMixin base-->plot module to group edit --- graphkit/base.py | 49 ++------------------------------------------- graphkit/network.py | 9 +++++---- graphkit/plot.py | 42 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 51 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 7764990d..a6bde1d2 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -5,52 +5,7 @@ except ImportError: import collections as abc - -class PlotMixin(object): - """ - Classes wishing to plot their graphs should inherit this and ... - - implement property ``_plotter`` to return a "partial" callable that somehow - ends up calling :func:`plot.plot_graph()` with the `graph` or any other - args binded appropriately. - The purpose is to avoid copying this function & documentation here around. - """ - - def plot( - self, - filename=None, - show=False, - jupyter=None, - **kws, - ): - """ - :param str filename: - Write diagram into a file. - Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`plot.supported_plot_formats()` for more. - :param show: - If it evaluates to true, opens the diagram in a matplotlib window. - If it equals `-1`, it plots but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). - :param inputs: - an optional name list, any nodes in there are plotted - as a "house" - :param outputs: - an optional name list, any nodes in there are plotted - as an "inverted-house" - :param solution: - an optional dict with values to annotate nodes - (currently content not shown, but node drawn as "filled") - - :return: - A :mod`pydot` instance - - See :func:`graphkit.plot.plot_graph()` for example code and - the legend of the plots. - """ - return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) +from . import plot class Data(object): @@ -196,7 +151,7 @@ def __repr__(self): self.provides) -class NetworkOperation(Operation, PlotMixin): +class NetworkOperation(Operation, plot.PlotMixin): def __init__(self, **kwargs): self.net = kwargs.pop('net') Operation.__init__(self, **kwargs) diff --git a/graphkit/network.py b/graphkit/network.py index 38fd32a5..a4987e35 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -73,10 +73,11 @@ from io import StringIO from itertools import chain - +import networkx as nx from boltons.setutils import IndexedSet as iset -from .base import Operation, PlotMixin +from . import plot +from .base import Operation from .modifiers import optional @@ -118,7 +119,7 @@ def __repr__(self): return 'PinInstruction("%s")' % self -class Network(PlotMixin): +class Network(plot.PlotMixin): """ Assemble operations & data into a directed-acyclic-graph (DAG) to run them. @@ -449,7 +450,7 @@ def compute( class ExecutionPlan(namedtuple("_ExecPlan", "net inputs outputs dag broken_edges steps"), - PlotMixin): + plot.PlotMixin): """ The result of the network's compilation phase. diff --git a/graphkit/plot.py b/graphkit/plot.py index 39b9fcc0..df10b3d3 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -14,6 +14,47 @@ def _merge_conditions(*conds): return sum(int(bool(c)) << i for i, c in enumerate(conds)) +class PlotMixin(object): + """ + Classes wishing to plot their graphs should inherit this and ... + + implement property ``_plotter`` to return a "partial" callable that somehow + ends up calling :func:`plot.plot_graph()` with the `graph` or any other + args binded appropriately. + The purpose is to avoid copying this function & documentation here around. + """ + + def plot(self, filename=None, show=False, jupyter=None, **kws): + """ + :param str filename: + Write diagram into a file. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`plot.supported_plot_formats()` for more. + :param show: + If it evaluates to true, opens the diagram in a matplotlib window. + If it equals `-1`, it plots but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). + :param inputs: + an optional name list, any nodes in there are plotted + as a "house" + :param outputs: + an optional name list, any nodes in there are plotted + as an "inverted-house" + :param solution: + an optional dict with values to annotate nodes + (currently content not shown, but node drawn as "filled") + + :return: + A :mod`pydot` instance + + See :func:`graphkit.plot.plot_graph()` for example code and + the legend of the plots. + """ + return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) + + def build_pydot(graph, steps=None, inputs=None, outputs=None, solution=None): """ Build a Graphviz graph """ import pydot @@ -227,3 +268,4 @@ def plot_graph( plt.show() return dot + From b4fa5e0b511e73c9dd9b5bae04701f705690d1af Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 10:52:46 +0300 Subject: [PATCH 068/167] enh(plot): +`title` arg at the bottom --- graphkit/plot.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index df10b3d3..7b800fa4 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -45,6 +45,8 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): :param solution: an optional dict with values to annotate nodes (currently content not shown, but node drawn as "filled") + :param title: + an optional string to display at the bottom of the graph :return: A :mod`pydot` instance @@ -55,7 +57,10 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) -def build_pydot(graph, steps=None, inputs=None, outputs=None, solution=None): +def build_pydot( + graph, steps=None, inputs=None, outputs=None, solution=None, + title=None +): """ Build a Graphviz graph """ import pydot from .base import NetworkOperation, Operation @@ -69,7 +74,7 @@ def get_node_name(a): return a.name return a - dot = pydot.Dot(graph_type="digraph") + dot = pydot.Dot(graph_type="digraph", label=title, fontname="italic") # draw nodes for nx_node in graph.nodes: @@ -160,6 +165,7 @@ def plot_graph( inputs=None, outputs=None, solution=None, + title=None, ): """ Plot a *Graphviz* graph/steps and return it, if no other argument provided. @@ -232,7 +238,7 @@ def plot_graph( The last 2 should plot identical graph diagrams. """ - dot = build_pydot(graph, steps, inputs, outputs, solution) + dot = build_pydot(graph, steps, inputs, outputs, solution, title) # Save plot # From c6f215549a0c473246b8011b7c76ddad50e247b5 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 10:56:45 +0300 Subject: [PATCH 069/167] FIX(plot): failing if steps not a list/ is none --- graphkit/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index 7b800fa4..06e8a510 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -81,7 +81,7 @@ def get_node_name(a): if isinstance(nx_node, str): kw = {} # FrameColor change by step type - if nx_node in steps: + if steps and nx_node in steps: choice = _merge_conditions( _is_class_value_in_list(steps, DeleteInstruction, nx_node), _is_class_value_in_list(steps, PinInstruction, nx_node), From ec69090b1cc0e6a1ecab6472e5a295f9fb440f84 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 11:13:06 +0300 Subject: [PATCH 070/167] ENH(plan,plot): executed operations drawn as filled + enh(plan): also sequential execution collects accounts executed. + refact(plan): executed_nodes-->executed. --- graphkit/network.py | 52 ++++++++++++++++++++++----------------------- graphkit/plot.py | 19 ++++++++++++----- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index a4987e35..b45a1849 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -67,8 +67,6 @@ import logging import os import time -import networkx as nx - from collections import defaultdict, namedtuple from io import StringIO from itertools import chain @@ -80,7 +78,6 @@ from .base import Operation from .modifiers import optional - log = logging.getLogger(__name__) class DataPlaceholderNode(str): @@ -392,6 +389,7 @@ def compile(self, inputs=(), outputs=()): pruned_dag, tuple(broken_edges), tuple(steps), + executed=iset(), ) # Cache compilation results to speed up future runs @@ -448,9 +446,10 @@ def compute( return solution -class ExecutionPlan(namedtuple("_ExecPlan", - "net inputs outputs dag broken_edges steps"), - plot.PlotMixin): +class ExecutionPlan( + namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps executed"), + plot.PlotMixin +): """ The result of the network's compilation phase. @@ -476,6 +475,8 @@ class ExecutionPlan(namedtuple("_ExecPlan", The tuple of operation-nodes & *instructions* needed to evaluate the given inputs & asked outputs, free memory and avoid overwritting any given intermediate inputs. + :ivar executed: + An empty set to collect all operations that have been executed so far. """ @property def broken_dag(self): @@ -483,10 +484,14 @@ def broken_dag(self): @property def _plotter(self): - from .plot import plot_graph - - return fnt.partial(plot_graph, graph=self.dag, steps=self.steps, - inputs=self.inputs, outputs=self.outputs) + return fnt.partial( + plot.plot_graph, + graph=self.dag, + steps=self.steps, + inputs=self.inputs, + outputs=self.outputs, + executed=self.executed, + ) def __repr__(self): return ( @@ -501,7 +506,7 @@ def get_data_node(self, name): if isinstance(node, DataPlaceholderNode): return node - def _can_schedule_operation(self, op, executed_nodes): + def _can_schedule_operation(self, op): """ Determines if a Operation is ready to be scheduled for execution @@ -509,8 +514,6 @@ def _can_schedule_operation(self, op, executed_nodes): :param op: The Operation object to check - :param set executed_nodes - A set containing all operations that have been executed so far :return: A boolean indicating whether the operation may be scheduled for execution based on what has already been executed. @@ -519,16 +522,14 @@ def _can_schedule_operation(self, op, executed_nodes): # regardless of whether their producers have yet to run. dependencies = set(n for n in nx.ancestors(self.broken_dag, op) if isinstance(n, Operation)) - return dependencies.issubset(executed_nodes) + return dependencies.issubset(self.executed) - def _can_evict_value(self, name, executed_nodes): + def _can_evict_value(self, name): """ Determines if a DataPlaceholderNode is ready to be deleted from solution. :param name: The name of the data node to check - :param executed_nodes: set - A set containing all operations that have been executed so far :return: A boolean indicating whether the data node can be deleted or not. """ @@ -536,7 +537,7 @@ def _can_evict_value(self, name, executed_nodes): # Use `broken_dag` not to block a successor waiting for this data, # since in any case will use a given input, not some pipe of this data. return data_node and set( - self.broken_dag.successors(data_node)).issubset(executed_nodes) + self.broken_dag.successors(data_node)).issubset(self.executed) def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): value_name = str(value_name) @@ -559,10 +560,6 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, self.net._thread_pool = Pool(thread_pool_size) pool = self.net._thread_pool - - # this keeps track of all nodes that have already executed - executed_nodes = set() # unordered, not iterated - # with each loop iteration, we determine a set of operations that can be # scheduled, then schedule them onto a thread pool, then collect their # results onto a memory solution for use upon the next iteration. @@ -574,8 +571,8 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, for node in self.steps: if ( isinstance(node, Operation) - and self._can_schedule_operation(node, executed_nodes) - and node not in executed_nodes + and self._can_schedule_operation(node) + and node not in self.executed ): upnext.append(node) elif isinstance(node, DeleteInstruction): @@ -584,7 +581,7 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, # An optional need may not have a value in the solution. if ( node in solution - and self._can_evict_value(node, executed_nodes) + and self._can_evict_value(node) ): log.debug("removing data '%s' from solution.", node) del solution[node] @@ -606,7 +603,7 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, upnext) for op, result in done_iterator: solution.update(result) - executed_nodes.add(op) + self.executed.add(op) def _execute_sequential_method(self, inputs, solution, overwrites): @@ -628,6 +625,7 @@ def _execute_sequential_method(self, inputs, solution, overwrites): # add outputs to solution solution.update(layer_outputs) + self.executed.add(step) # record execution time t_complete = round(time.time() - t0, 5) @@ -657,6 +655,8 @@ def execute(self, solution, overwrites=None, method=None): because they were "pinned" by input vaules. If missing, the overwrites values are simply discarded. """ + # Clean executed operation from any previous execution. + self.executed.clear() # choose a method of execution executor = (self._execute_thread_pool_barrier_method diff --git a/graphkit/plot.py b/graphkit/plot.py index 06e8a510..c073bef6 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -45,6 +45,8 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): :param solution: an optional dict with values to annotate nodes (currently content not shown, but node drawn as "filled") + :param executed: + an optional container with operations executed, drawn "filled" :param title: an optional string to display at the bottom of the graph @@ -58,8 +60,13 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): def build_pydot( - graph, steps=None, inputs=None, outputs=None, solution=None, - title=None + graph, + steps=None, + inputs=None, + outputs=None, + solution=None, + executed=None, + title=None, ): """ Build a Graphviz graph """ import pydot @@ -106,8 +113,9 @@ def get_node_name(a): else: # Operation kw = {} shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" - if nx_node in steps: - kw["style"] = "bold" + if executed and nx_node in executed: + kw["style"] = "filled" + kw["fillcolor"] = "gray" node = pydot.Node(name=nx_node.name, shape=shape, **kw) dot.add_node(node) @@ -165,6 +173,7 @@ def plot_graph( inputs=None, outputs=None, solution=None, + executed=None, title=None, ): """ @@ -238,7 +247,7 @@ def plot_graph( The last 2 should plot identical graph diagrams. """ - dot = build_pydot(graph, steps, inputs, outputs, solution, title) + dot = build_pydot(graph, steps, inputs, outputs, solution, executed, title) # Save plot # From 24a3d1e85900ab1512ca4516f27f5217327c48c6 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 15:36:41 +0300 Subject: [PATCH 071/167] FIX(net): revived last_plan was never set (HEAD~13: 64e0028) --- graphkit/network.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index b45a1849..da43c39f 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -133,9 +133,9 @@ def __init__(self, **kwargs): #: that is occuring when accessing the dag in networkx. self._cached_plans = {} - #: the execution_plan of the last call to :meth:`compile()`, - #: for debugging purposes. - self._last_plan = None + #: the execution_plan of the last call to :meth:`compute()` + #: (not ``compile()``!), for debugging purposes. + self.last_plan = None @property def _plotter(self): @@ -429,7 +429,7 @@ def compute( "The outputs argument must be a list" # Build the execution plan. - plan = self.compile(named_inputs.keys(), outputs) + self.last_plan = plan = self.compile(named_inputs.keys(), outputs) # start with fresh data solution. solution = dict(named_inputs) From 77fc887a6abb11c6a707b95b523fd156a8f984c7 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 15:47:20 +0300 Subject: [PATCH 072/167] refact(plot): reodred build-dot utils above use --- graphkit/plot.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index c073bef6..16f6f4cd 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -5,15 +5,6 @@ import os -def _is_class_value_in_list(lst, cls, value): - return any(isinstance(i, cls) and i == value for i in lst) - - -def _merge_conditions(*conds): - """combines conditions as a choice in binary range, eg, 2 conds --> [0, 3]""" - return sum(int(bool(c)) << i for i, c in enumerate(conds)) - - class PlotMixin(object): """ Classes wishing to plot their graphs should inherit this and ... @@ -59,6 +50,15 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) +def _is_class_value_in_list(lst, cls, value): + return any(isinstance(i, cls) and i == value for i in lst) + + +def _merge_conditions(*conds): + """combines conditions as a choice in binary range, eg, 2 conds --> [0, 3]""" + return sum(int(bool(c)) << i for i, c in enumerate(conds)) + + def build_pydot( graph, steps=None, From da087df75a9b4ae200cd9a2335aafc1e201d9f3a Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 16:00:58 +0300 Subject: [PATCH 073/167] doc(plot): move centrally all API doc on PlotMixin; Style --- graphkit/plot.py | 123 ++++++++++++++++++++-------------------------- test/test_plot.py | 1 - 2 files changed, 52 insertions(+), 72 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index 16f6f4cd..24f5e35e 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -34,7 +34,7 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): an optional name list, any nodes in there are plotted as an "inverted-house" :param solution: - an optional dict with values to annotate nodes + an optional dict with values to annotate nodes, drawn "filled" (currently content not shown, but node drawn as "filled") :param executed: an optional container with operations executed, drawn "filled" @@ -44,8 +44,50 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): :return: A :mod`pydot` instance - See :func:`graphkit.plot.plot_graph()` for example code and - the legend of the plots. + Note that the `graph` argument is absent - Each PlotMixin provides + its own graph internally; use directly :func:`plot_graph()` to provide + a different graph. + + **Legend:** + + NODES: + + - **circle**: function + - **oval**: subgraph function + - **house**: given input + - **inversed-house**: asked output + - **polygon**: given both as input & asked as output (what?) + - **square**: intermediate data, neither given nor asked. + - **red frame**: delete-instruction, to free up memory. + - **filled**: data node has a value in `solution`, shown in tooltip. + - **thick frame**: function/data node visited. + + ARROWS + + - **solid black arrows**: dependencies (source-data are``need``\ed + by target-operations, sources-operations ``provide`` target-data) + - **dashed black arrows**: optional needs + - **green-dotted arrows**: execution steps labeled in succession + + :return: + An instance of the :mod`pydot` graph + + **Sampole code:** + + >>> from graphkit import compose, operation + >>> from graphkit.modifiers import optional + + >>> pipeline = compose(name="pipeline")( + ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), + ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), + ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), + ... ) + + >>> inputs = {'a': 1, 'b1': 2} + >>> solution=pipeline(inputs) + + >>> pipeline.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); + >>> pipeline.last_plan.plot('plot2.svg', solution=solution); """ return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) @@ -87,6 +129,7 @@ def get_node_name(a): for nx_node in graph.nodes: if isinstance(nx_node, str): kw = {} + # FrameColor change by step type if steps and nx_node in steps: choice = _merge_conditions( @@ -108,10 +151,11 @@ def get_node_name(a): if solution and nx_node in solution: kw["style"] = "filled" kw["fillcolor"] = "gray" - # kw["tooltip"] = nx_node, solution.get(nx_node) + # kw["tooltip"] = str(solution.get(nx_node)) # not working :-() node = pydot.Node(name=nx_node, shape=shape, **kw) else: # Operation kw = {} + shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" if executed and nx_node in executed: kw["style"] = "filled" @@ -179,73 +223,11 @@ def plot_graph( """ Plot a *Graphviz* graph/steps and return it, if no other argument provided. - Legend: - - - NODES: - - - **circle**: function - - **oval**: subgraph function - - **house**: given input - - **inversed-house**: asked output - - **polygon**: given both as input & asked as output (what?) - - **square**: intermediate data, neither given nor asked. - - **red frame**: delete-instruction, to free up memory. - - **filled**: data node has a value in `solution`, shown in tooltip. - - **thick frame**: function/data node visited. - - ARROWS - - - **solid black arrows**: dependencies (source-data are``need``\ed - by target-operations, sources-operations ``provide`` target-data) - - **dashed black arrows**: optional needs - - **green-dotted arrows**: execution steps labeled in succession - :param graph: - what to plot - :param str filename: - Write diagram into a file. - Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` - call :func:`plot.supported_plot_formats()` for more. - :param show: - If it evaluates to true, opens the diagram in a matplotlib window. - If it equals `-1``, it plots but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). - :param steps: - a list of nodes & instructions to overlay on the diagram - :param inputs: - an optional name list, any nodes in there are plotted - as a "house" - :param outputs: - an optional name list, any nodes in there are plotted - as an "inverted-house" - :param solution: - an optional dict with values to annotate nodes - (currently content not shown, but node drawn as "filled") - - :return: - An instance of the :mod`pydot` graph - - **Example:** - - >>> from graphkit import compose, operation - >>> from graphkit.modifiers import optional - - >>> pipeline = compose(name="pipeline")( - ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), - ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), - ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), - ... ) - - >>> inputs = {'a': 1, 'b1': 2} - >>> solution=pipeline(inputs) - - >>> pipeline.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); - >>> pipeline.last_plan.plot('plot2.svg', solution=solution); - - The last 2 should plot identical graph diagrams. + the base graph to plot + + See :func:`PlotMixin()` for the rest arguments, sample code, and + the legend of the plots. """ dot = build_pydot(graph, steps, inputs, outputs, solution, executed, title) @@ -283,4 +265,3 @@ def plot_graph( plt.show() return dot - diff --git a/test/test_plot.py b/test/test_plot.py index 39ad039f..33fb04b6 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -48,7 +48,6 @@ def solution(pipeline, inputs, outputs, request): def test_plotting_docstring(): common_formats = ".png .dot .jpg .jpeg .pdf .svg".split() for ext in common_formats: - assert ext in plot.plot_graph.__doc__ assert ext in base.NetworkOperation.plot.__doc__ assert ext in network.Network.plot.__doc__ From ac73bbba68d258d1be262a5a6bdfb5ea9c969f63 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 16:06:59 +0300 Subject: [PATCH 074/167] FEAT(plot,plan): +yellow broken links on original graph --- graphkit/network.py | 3 ++- graphkit/plot.py | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index da43c39f..2a4ed86d 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -486,11 +486,12 @@ def broken_dag(self): def _plotter(self): return fnt.partial( plot.plot_graph, - graph=self.dag, + graph=self.net.graph, steps=self.steps, inputs=self.inputs, outputs=self.outputs, executed=self.executed, + edge_props={e: {'color':'yellow'} for e in self.broken_edges}, ) def __repr__(self): diff --git a/graphkit/plot.py b/graphkit/plot.py index 24f5e35e..0adbbd01 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -2,9 +2,13 @@ # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. import io +import logging import os +log = logging.getLogger(__name__) + + class PlotMixin(object): """ Classes wishing to plot their graphs should inherit this and ... @@ -40,6 +44,10 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): an optional container with operations executed, drawn "filled" :param title: an optional string to display at the bottom of the graph + :param node_props: + an optional nested dict of Grapvhiz attributes for certain nodes + :param edge_props: + an optional nested dict of Grapvhiz attributes for certain edges :return: A :mod`pydot` instance @@ -101,6 +109,19 @@ def _merge_conditions(*conds): return sum(int(bool(c)) << i for i, c in enumerate(conds)) +def _apply_user_props(dotobj, user_props, key): + if user_props and key in user_props: + dotobj.get_attributes().update(user_props[key]) + # Delete it, to report unmatched ones, AND not to annotate `steps`. + del user_props[key] + + +def _report_unmatched_user_props(user_props, kind): + if user_props and log.isEnabledFor(logging.WARNING): + unmatched = "\n ".join(str(i) for i in user_props.items()) + log.warning("Unmatched `%s_props`:\n +--%s", kind, unmatched) + + def build_pydot( graph, steps=None, @@ -109,6 +130,8 @@ def build_pydot( solution=None, executed=None, title=None, + node_props=None, + edge_props=None, ): """ Build a Graphviz graph """ import pydot @@ -162,8 +185,12 @@ def get_node_name(a): kw["fillcolor"] = "gray" node = pydot.Node(name=nx_node.name, shape=shape, **kw) + _apply_user_props(node, node_props, key=node.get_name()) + dot.add_node(node) + _report_unmatched_user_props(node_props, "node") + # draw edges for src, dst in graph.edges: src_name = get_node_name(src) @@ -174,8 +201,13 @@ def get_node_name(a): ): kw["style"] = "dashed" edge = pydot.Edge(src=src_name, dst=dst_name, **kw) + + _apply_user_props(edge, edge_props, key=(src, dst)) + dot.add_edge(edge) + _report_unmatched_user_props(edge_props, "edge") + # draw steps sequence if steps and len(steps) > 1: it1 = iter(steps) @@ -219,6 +251,8 @@ def plot_graph( solution=None, executed=None, title=None, + node_props=None, + edge_props=None, ): """ Plot a *Graphviz* graph/steps and return it, if no other argument provided. @@ -229,7 +263,9 @@ def plot_graph( See :func:`PlotMixin()` for the rest arguments, sample code, and the legend of the plots. """ - dot = build_pydot(graph, steps, inputs, outputs, solution, executed, title) + dot = build_pydot( + graph, steps, inputs, outputs, solution, executed, title, node_props, edge_props + ) # Save plot # From c2829a3cb71b0ea3d4fbd5bffd315046b84ad895 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 16:15:11 +0300 Subject: [PATCH 075/167] refact(plot): PlotMixin --> Plotter, _plotter() --> _plot() --- graphkit/base.py | 4 ++-- graphkit/network.py | 8 ++++---- graphkit/plot.py | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index a6bde1d2..893be45a 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -151,7 +151,7 @@ def __repr__(self): self.provides) -class NetworkOperation(Operation, plot.PlotMixin): +class NetworkOperation(Operation, plot.Plotter): def __init__(self, **kwargs): self.net = kwargs.pop('net') Operation.__init__(self, **kwargs) @@ -161,7 +161,7 @@ def __init__(self, **kwargs): self._overwrites_collector = None @property - def _plotter(self): + def _plot(self): return self.net.plot def _compute(self, named_inputs, outputs=None): diff --git a/graphkit/network.py b/graphkit/network.py index 2a4ed86d..2f8822fa 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -116,7 +116,7 @@ def __repr__(self): return 'PinInstruction("%s")' % self -class Network(plot.PlotMixin): +class Network(plot.Plotter): """ Assemble operations & data into a directed-acyclic-graph (DAG) to run them. @@ -138,7 +138,7 @@ def __init__(self, **kwargs): self.last_plan = None @property - def _plotter(self): + def _plot(self): from .plot import plot_graph return fnt.partial(plot_graph, graph=self.graph) @@ -448,7 +448,7 @@ def compute( class ExecutionPlan( namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps executed"), - plot.PlotMixin + plot.Plotter ): """ The result of the network's compilation phase. @@ -483,7 +483,7 @@ def broken_dag(self): return nx.restricted_view(self.dag, nodes=(), edges=self.broken_edges) @property - def _plotter(self): + def _plot(self): return fnt.partial( plot.plot_graph, graph=self.net.graph, diff --git a/graphkit/plot.py b/graphkit/plot.py index 0adbbd01..e1219c60 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -9,11 +9,11 @@ log = logging.getLogger(__name__) -class PlotMixin(object): +class Plotter(object): """ Classes wishing to plot their graphs should inherit this and ... - implement property ``_plotter`` to return a "partial" callable that somehow + implement property ``_plot`` to return a "partial" callable that somehow ends up calling :func:`plot.plot_graph()` with the `graph` or any other args binded appropriately. The purpose is to avoid copying this function & documentation here around. @@ -52,7 +52,7 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): :return: A :mod`pydot` instance - Note that the `graph` argument is absent - Each PlotMixin provides + Note that the `graph` argument is absent - Each Plotter provides its own graph internally; use directly :func:`plot_graph()` to provide a different graph. @@ -97,7 +97,7 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): >>> pipeline.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); >>> pipeline.last_plan.plot('plot2.svg', solution=solution); """ - return self._plotter(filename=filename, show=show, jupyter=jupyter, **kws) + return self._plot(filename=filename, show=show, jupyter=jupyter, **kws) def _is_class_value_in_list(lst, cls, value): @@ -260,7 +260,7 @@ def plot_graph( :param graph: the base graph to plot - See :func:`PlotMixin()` for the rest arguments, sample code, and + See :meth:`Plotter.plot()` for the rest arguments, sample code, and the legend of the plots. """ dot = build_pydot( From c61947b23bab0212dd8abdb5a95e7787a6ac5e07 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 17:29:42 +0300 Subject: [PATCH 076/167] TEST(PLOT): Check also ExecPlan --- test/test_plot.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/test/test_plot.py b/test/test_plot.py index 33fb04b6..58eaed32 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -52,18 +52,32 @@ def test_plotting_docstring(): assert ext in network.Network.plot.__doc__ -def test_plot_formats(pipeline, input_names, outputs, solution, tmp_path): +def test_plot_formats(pipeline, input_names, inputs, outputs, tmp_path): ## Generate all formats (not needing to save files) + # run it here (and not in ficture) to ansure `last_plan` exists. + solution = pipeline(inputs, outputs) + # ...these are not working on my PC, or travis. forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() - prev_dot = None + prev_dot1 = prev_dot2 = None for ext in plot.supported_plot_formats(): if ext not in forbidden_formats: - dot = pipeline.plot(inputs=input_names, outputs=outputs, solution=solution) - assert dot - assert ext == ".jpg" or dot != prev_dot - prev_dot = dot + # Check Network. + # + dot1 = pipeline.plot(inputs=input_names, outputs=outputs, solution=solution) + assert dot1 + assert ext == ".jpg" or dot1 != prev_dot1 + prev_dot1 = dot1 + + # Check ExecutionPlan. + # + dot2 = pipeline.net.last_plan.plot( + inputs=input_names, outputs=outputs, solution=solution + ) + assert dot2 + assert ext == ".jpg" or dot2 != prev_dot2 + prev_dot2 = dot2 def test_plot_bad_format(pipeline, tmp_path): @@ -78,14 +92,13 @@ def test_plot_bad_format(pipeline, tmp_path): def test_plot_write_file(pipeline, tmp_path): # Try saving a file from one format. - fpath = tmp_path / "workflow.png" - - dot = pipeline.plot(str(fpath)) + fpath = tmp_path / "network.png" + dot1 = pipeline.plot(str(fpath)) assert fpath.exists() - assert dot + assert dot1 -def test_plot_matplib(pipeline, tmp_path): +def test_plot_matpotlib(pipeline, tmp_path): ## Try matplotlib Window, but # without opening a Window. if sys.version_info < (3, 5): From 434d4e2cb10843740155e8a664e65920bfe02798 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 17:37:18 +0300 Subject: [PATCH 077/167] REFACT(PLOT): Plotter builds dot & renders... ... based on Law of Demeter simplify kwargs, defined in one place. + enh: plotters "suggest" kwargs, possinly to override them (no duplcate arg when attempted). --- graphkit/base.py | 6 ++-- graphkit/network.py | 36 +++++++++++++----------- graphkit/plot.py | 67 ++++++++++++++++++++++++--------------------- test/test_plot.py | 4 ++- 4 files changed, 62 insertions(+), 51 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 893be45a..3e3381ed 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -160,9 +160,9 @@ def __init__(self, **kwargs): self._execution_method = "sequential" self._overwrites_collector = None - @property - def _plot(self): - return self.net.plot + def _build_pydot(self, **kws): + """delegate to network""" + return self.net._build_pydot(**kws) def _compute(self, named_inputs, outputs=None): return self.net.compute( diff --git a/graphkit/network.py b/graphkit/network.py index 2f8822fa..656244f0 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -63,7 +63,6 @@ intermediate *calculated* values that are overwritten by intermediate (aka "pinned") input-values. """ -import functools as fnt import logging import os import time @@ -137,11 +136,12 @@ def __init__(self, **kwargs): #: (not ``compile()``!), for debugging purposes. self.last_plan = None - @property - def _plot(self): - from .plot import plot_graph + def _build_pydot(self, **kws): + from .plot import build_pydot + + kws.setdefault('graph', self.graph) - return fnt.partial(plot_graph, graph=self.graph) + return build_pydot(**kws) def add_op(self, operation): """ @@ -478,21 +478,25 @@ class ExecutionPlan( :ivar executed: An empty set to collect all operations that have been executed so far. """ + @property def broken_dag(self): return nx.restricted_view(self.dag, nodes=(), edges=self.broken_edges) - @property - def _plot(self): - return fnt.partial( - plot.plot_graph, - graph=self.net.graph, - steps=self.steps, - inputs=self.inputs, - outputs=self.outputs, - executed=self.executed, - edge_props={e: {'color':'yellow'} for e in self.broken_edges}, - ) + def _build_pydot(self, **kws): + from .plot import build_pydot + + mykws = { + "graph": self.net.graph, + "steps": self.steps, + "inputs": self.inputs, + "outputs": self.outputs, + "executed": self.executed, + "edge_props": {e: {"color": "yellow"} for e in self.broken_edges}, + } + mykws.update(kws) + + return build_pydot(**mykws) def __repr__(self): return ( diff --git a/graphkit/plot.py b/graphkit/plot.py index e1219c60..f9ee24ae 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -78,9 +78,10 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): - **green-dotted arrows**: execution steps labeled in succession :return: - An instance of the :mod`pydot` graph + An instance of the :mod`pydot` graph or whatever rendered + (e.g. jupyter SVG or matplotlib image) - **Sampole code:** + **Sample code:** >>> from graphkit import compose, operation >>> from graphkit.modifiers import optional @@ -97,7 +98,8 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): >>> pipeline.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); >>> pipeline.last_plan.plot('plot2.svg', solution=solution); """ - return self._plot(filename=filename, show=show, jupyter=jupyter, **kws) + dot = self._build_pydot(**kws) + return render_pydot(dot, filename=filename, show=show, jupyter=jupyter) def _is_class_value_in_list(lst, cls, value): @@ -133,7 +135,12 @@ def build_pydot( node_props=None, edge_props=None, ): - """ Build a Graphviz graph """ + """ + Build a *Graphviz* graph/steps/inputs/outputs and return it. + + See :meth:`Plotter.plot()` for the arguments, sample code, and + the legend of the plots. + """ import pydot from .base import NetworkOperation, Operation from .modifiers import optional @@ -240,33 +247,29 @@ def supported_plot_formats(): return [".%s" % f for f in pydot.Dot().formats] -def plot_graph( - graph, - filename=None, - show=False, - jupyter=False, - steps=None, - inputs=None, - outputs=None, - solution=None, - executed=None, - title=None, - node_props=None, - edge_props=None, -): +def render_pydot(dot, filename=None, show=False, jupyter=False): """ - Plot a *Graphviz* graph/steps and return it, if no other argument provided. - - :param graph: - the base graph to plot - - See :meth:`Plotter.plot()` for the rest arguments, sample code, and - the legend of the plots. + Plot a *Graphviz* dot in a matplotlib, in file or return it for Jupyter. + + :param dot: + the pre-built *Graphviz* dot instance + :param str filename: + Write diagram into a file. + Common extensions are ``.png .dot .jpg .jpeg .pdf .svg`` + call :func:`plot.supported_plot_formats()` for more. + :param show: + If it evaluates to true, opens the diagram in a matplotlib window. + If it equals `-1`, it returns the image but does not open the Window. + :param jupyter: + If it evaluates to true, return an SVG suitable to render + in *jupyter notebook cells* (`ipython` must be installed). + + :return: + the matplotlib image if ``show=-1``, the SVG for Jupyter if ``jupyter=true``, + or `dot`. + + See :meth:`Plotter.plot()` for sample code. """ - dot = build_pydot( - graph, steps, inputs, outputs, solution, executed, title, node_props, edge_props - ) - # Save plot # if filename: @@ -285,7 +288,7 @@ def plot_graph( if jupyter: from IPython.display import SVG - dot = SVG(data=dot.create_svg()) + return SVG(data=dot.create_svg()) ## Display graph via matplotlib # @@ -296,8 +299,10 @@ def plot_graph( png = dot.create_png() sio = io.BytesIO(png) img = mpimg.imread(sio) - plt.imshow(img, aspect="equal") if show != -1: + plt.imshow(img, aspect="equal") plt.show() + return img + return dot diff --git a/test/test_plot.py b/test/test_plot.py index 58eaed32..367036c5 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -109,7 +109,9 @@ def test_plot_matpotlib(pipeline, tmp_path): matplotlib.use("Agg") # do not open window in headless travis - assert pipeline.plot(show=-1) + img = pipeline.plot(show=-1) + assert img is not None + assert len(img) > 0 @pytest.mark.skipif(sys.version_info < (3, 5), reason="ipython-7+ dropped PY3.4-") From dabc7872257a2248b59987140bf4bc7a9c3c57aa Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 6 Oct 2019 19:43:21 +0300 Subject: [PATCH 078/167] ENH(plan.polt): CLUSTER pruned nodes --- graphkit/network.py | 1 + graphkit/plot.py | 79 +++++++++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 656244f0..5a70a01d 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -493,6 +493,7 @@ def _build_pydot(self, **kws): "outputs": self.outputs, "executed": self.executed, "edge_props": {e: {"color": "yellow"} for e in self.broken_edges}, + "clusters": {n: "pruned" for n in self.dag.nodes}, } mykws.update(kws) diff --git a/graphkit/plot.py b/graphkit/plot.py index f9ee24ae..009ad89e 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -48,6 +48,8 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): an optional nested dict of Grapvhiz attributes for certain nodes :param edge_props: an optional nested dict of Grapvhiz attributes for certain edges + :param clusters: + an optional mapping of nodes --> cluster-names, to group them :return: A :mod`pydot` instance @@ -58,24 +60,38 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): **Legend:** - NODES: - - - **circle**: function - - **oval**: subgraph function - - **house**: given input - - **inversed-house**: asked output - - **polygon**: given both as input & asked as output (what?) - - **square**: intermediate data, neither given nor asked. - - **red frame**: delete-instruction, to free up memory. - - **filled**: data node has a value in `solution`, shown in tooltip. - - **thick frame**: function/data node visited. - - ARROWS - - - **solid black arrows**: dependencies (source-data are``need``\ed - by target-operations, sources-operations ``provide`` target-data) - - **dashed black arrows**: optional needs - - **green-dotted arrows**: execution steps labeled in succession + *NODES:* + + circle + function + oval + subgraph function + house + given input + inversed-house + asked output + polygon + given both as input & asked as output (what?) + square + intermediate data, neither given nor asked. + red frame + delete-instruction, to free up memory. + filled + data node has a value in `solution` OR function has been executed. + thick frame + function/data node in `steps`. + + *ARROWS* + + solid black arrows + dependencies (source-data are``need``-ed by target-operations, + sources-operations ``provide`` target-data) + dashed black arrows + optional needs + green-dotted arrows + execution steps labeled in succession + yellow arrows + broken provides during pruning :return: An instance of the :mod`pydot` graph or whatever rendered @@ -134,10 +150,11 @@ def build_pydot( title=None, node_props=None, edge_props=None, + clusters=None, ): """ - Build a *Graphviz* graph/steps/inputs/outputs and return it. - + Build a *Graphviz* out of a Network graph/steps/inputs/outputs and return it. + See :meth:`Plotter.plot()` for the arguments, sample code, and the legend of the plots. """ @@ -148,6 +165,24 @@ def build_pydot( assert graph is not None + new_clusters = {} + + def append_or_cluster_node(dot, nx_node, node): + if not clusters or not nx_node in clusters: + dot.add_node(node) + else: + cluster_name = clusters[nx_node] + node_cluster = new_clusters.get(cluster_name) + if not node_cluster: + node_cluster = new_clusters[cluster_name] = pydot.Cluster( + cluster_name, label=cluster_name + ) + node_cluster.add_node(node) + + def append_any_clusters(dot): + for cluster in new_clusters.values(): + dot.add_subgraph(cluster) + def get_node_name(a): if isinstance(a, Operation): return a.name @@ -194,10 +229,12 @@ def get_node_name(a): _apply_user_props(node, node_props, key=node.get_name()) - dot.add_node(node) + append_or_cluster_node(dot, nx_node, node) _report_unmatched_user_props(node_props, "node") + append_any_clusters(dot) + # draw edges for src, dst in graph.edges: src_name = get_node_name(src) From c5b91676219266d3a6caab57ba24cacde98f27d9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 03:43:26 +0300 Subject: [PATCH 079/167] FIX(plot): VISUAL fixes & COLOR-palettes: + FIX: apply thickness as in legend also for operators. + enh: don't cluster if no nodes pruned. + enh: netop includes its name a graph-title. + color palette: wheat filled nodes. + reuse common func. --- graphkit/base.py | 1 + graphkit/network.py | 7 +++++-- graphkit/plot.py | 32 ++++++++++++++++++++------------ test/test_graphkit.py | 36 ++++++++++++++++++------------------ 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index 3e3381ed..3f17c4ea 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -162,6 +162,7 @@ def __init__(self, **kwargs): def _build_pydot(self, **kws): """delegate to network""" + kws.setdefault("title", self.name) return self.net._build_pydot(**kws) def _compute(self, named_inputs, outputs=None): diff --git a/graphkit/network.py b/graphkit/network.py index 5a70a01d..44da535b 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -486,14 +486,17 @@ def broken_dag(self): def _build_pydot(self, **kws): from .plot import build_pydot + clusters = None + if self.dag.nodes != self.net.graph.nodes: + clusters = {n: "after prunning" for n in self.dag.nodes} mykws = { "graph": self.net.graph, "steps": self.steps, "inputs": self.inputs, "outputs": self.outputs, "executed": self.executed, - "edge_props": {e: {"color": "yellow"} for e in self.broken_edges}, - "clusters": {n: "pruned" for n in self.dag.nodes}, + "edge_props": {e: {"color": "wheat", "penwidth": 2} for e in self.broken_edges}, + "clusters": clusters, } mykws.update(kws) diff --git a/graphkit/plot.py b/graphkit/plot.py index 009ad89e..b94bcd5a 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -90,7 +90,7 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): optional needs green-dotted arrows execution steps labeled in succession - yellow arrows + wheat arrows broken provides during pruning :return: @@ -117,6 +117,9 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): dot = self._build_pydot(**kws) return render_pydot(dot, filename=filename, show=show, jupyter=jupyter) + def _build_pydot(self, **kws): + raise AssertionError("Must implement that!") + def _is_class_value_in_list(lst, cls, value): return any(isinstance(i, cls) and i == value for i in lst) @@ -165,6 +168,9 @@ def build_pydot( assert graph is not None + steps_thickness = 3 + fill_color = "wheat" + steps_color = "#009999" new_clusters = {} def append_or_cluster_node(dot, nx_node, node): @@ -202,8 +208,8 @@ def get_node_name(a): _is_class_value_in_list(steps, PinInstruction, nx_node), ) # 0 is singled out because `nx_node` exists in `steps`. - color = "NOPE red blue purple".split()[choice] - kw = {"color": color, "penwidth": 2} + color = "NOPE #990000 blue purple".split()[choice] + kw = {"color": color, "penwidth": steps_thickness} # SHAPE change if with inputs/outputs. # tip: https://graphviz.gitlab.io/_pages/doc/info/shapes.html @@ -215,16 +221,18 @@ def get_node_name(a): # LABEL change with solution. if solution and nx_node in solution: kw["style"] = "filled" - kw["fillcolor"] = "gray" + kw["fillcolor"] = fill_color # kw["tooltip"] = str(solution.get(nx_node)) # not working :-() node = pydot.Node(name=nx_node, shape=shape, **kw) else: # Operation kw = {} + if steps and nx_node in steps: + kw["penwdth"] = steps_thickness shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" if executed and nx_node in executed: kw["style"] = "filled" - kw["fillcolor"] = "gray" + kw["fillcolor"] = fill_color node = pydot.Node(name=nx_node.name, shape=shape, **kw) _apply_user_props(node, node_props, key=node.get_name()) @@ -240,8 +248,8 @@ def get_node_name(a): src_name = get_node_name(src) dst_name = get_node_name(dst) kw = {} - if isinstance(dst, Operation) and any( - n == src and isinstance(n, optional) for n in dst.needs + if isinstance(dst, Operation) and _is_class_value_in_list( + dst.needs, optional, src ): kw["style"] = "dashed" edge = pydot.Edge(src=src_name, dst=dst_name, **kw) @@ -265,11 +273,11 @@ def get_node_name(a): dst=dst_name, label=str(i), style="dotted", - color="green", - fontcolor="green", + color=steps_color, + fontcolor=steps_color, fontname="bold", fontsize=18, - penwidth=3, + penwidth=steps_thickness, arrowhead="vee", ) dot.add_edge(edge) @@ -304,7 +312,7 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): :return: the matplotlib image if ``show=-1``, the SVG for Jupyter if ``jupyter=true``, or `dot`. - + See :meth:`Plotter.plot()` for sample code. """ # Save plot @@ -341,5 +349,5 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): plt.show() return img - + return dot diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 6c801488..0a99ad63 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -452,9 +452,9 @@ def test_optional_per_function_with_same_output(): ## ATTENTION, the selected function is NOT the one with more inputs # but the 1st satisfiable function added in the network. - add_op = operation(name='add', needs=['a', 'b'], provides='a+b')(add) + add_op = operation(name='add', needs=['a', 'b'], provides='a+-b')(add) sub_op_optional = operation( - name='sub_opt', needs=['a', modifiers.optional('b')], provides='a+b' + name='sub_opt', needs=['a', modifiers.optional('b')], provides='a+-b' )(lambda a, b=10: a - b) # Normal order @@ -462,24 +462,24 @@ def test_optional_per_function_with_same_output(): pipeline = compose(name='partial_optionals')(add_op, sub_op_optional) # named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+b': 3, 'b': 2} - assert pipeline(named_inputs, ['a+b']) == {'a+b': 3} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': 3, 'b': 2} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': 3} # named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} - assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} # Inverse op order # pipeline = compose(name='partial_optionals')(sub_op_optional, add_op) # named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+b': -1, 'b': 2} - assert pipeline(named_inputs, ['a+b']) == {'a+b': -1} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': -1, 'b': 2} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -1} # named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} - assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} # PARALLEL + Normal order # @@ -487,12 +487,12 @@ def test_optional_per_function_with_same_output(): pipeline.set_execution_method("parallel") # named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+b': 3, 'b': 2} - assert pipeline(named_inputs, ['a+b']) == {'a+b': 3} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': 3, 'b': 2} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': 3} # named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} - assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} # PARALLEL + Inverse op order # @@ -500,12 +500,12 @@ def test_optional_per_function_with_same_output(): pipeline.set_execution_method("parallel") # named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+b': -1, 'b': 2} - assert pipeline(named_inputs, ['a+b']) == {'a+b': -1} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': -1, 'b': 2} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -1} # named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+b': -9} - assert pipeline(named_inputs, ['a+b']) == {'a+b': -9} + assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} + assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} def test_deleted_optional(): From 1d443ce966a980153b5ea6b059242e8e438e7450 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 04:07:21 +0300 Subject: [PATCH 080/167] ENH(netop): mark all its needs as OPTIONAL --- graphkit/functional.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index b7e4bd57..84f92f84 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -198,7 +198,9 @@ def __call__(self, *operations): operations = merge_set provides = iset(chain(*[op.provides for op in operations])) - needs = iset(chain(*[op.needs for op in operations])) - provides + # Mark them all as optional, now that #18 calmly ignores + # non-fully satisfied operations. + needs = iset(chain(*[optional(n) for op in operations for n in op.needs ])) - provides # Build network net = Network() From 77bec49425268455513bc6a66aa0138e4c3e2859 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 06:02:09 +0300 Subject: [PATCH 081/167] FIX(NET): were FORGETTING PRUNED ASKED-OUTPUTs... ... bugged in the opening commit d403783 of this PR, and discovered 68(!) commits later, and all that time had to live with x4 broken TCs with asked-outputs. --- graphkit/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/network.py b/graphkit/network.py index 44da535b..696c7687 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -343,7 +343,7 @@ def _prune_graph(self, outputs, inputs): # Prune unsatisfied operations (those with partial inputs or no outputs). unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) # Clone it so that it is picklable. - pruned_dag = dag.subgraph(self.graph.nodes - unsatisfied).copy() + pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied).copy() return pruned_dag, tuple(broken_edges) From 89e4edbfd696c138ed1a3b9a79ee0f53a111c259 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 13:06:19 +0300 Subject: [PATCH 082/167] ENH(plot,TC): pipeline delegates to last_plan, if exists --- graphkit/base.py | 3 ++- test/test_plot.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/graphkit/base.py b/graphkit/base.py index 3f17c4ea..5bf35d7e 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -163,7 +163,8 @@ def __init__(self, **kwargs): def _build_pydot(self, **kws): """delegate to network""" kws.setdefault("title", self.name) - return self.net._build_pydot(**kws) + plotter = self.net.last_plan or self.net + return plotter._build_pydot(**kws) def _compute(self, named_inputs, outputs=None): return self.net.compute( diff --git a/test/test_plot.py b/test/test_plot.py index 367036c5..d28ffa80 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -80,6 +80,42 @@ def test_plot_formats(pipeline, input_names, inputs, outputs, tmp_path): prev_dot2 = dot2 +def test_plotters_hierarchy(pipeline, inputs, outputs): + # Plotting original network, no plan. + base_dot = str(pipeline.plot(inputs=inputs, outputs=outputs)) + assert base_dot + assert pipeline.name in str(base_dot) + + solution = pipeline(inputs, outputs) + + # Plotting delegates to netwrok plan. + plan_dot = str(pipeline.plot(inputs=inputs, outputs=outputs)) + assert plan_dot + assert plan_dot != base_dot + assert pipeline.name in str(plan_dot) + + # Plot a plan + solution, which must be different from all before. + sol_plan_dot = str(pipeline.plot(inputs=inputs, outputs=outputs, solution=solution)) + assert sol_plan_dot != base_dot + assert sol_plan_dot != plan_dot + assert pipeline.name in str(plan_dot) + + plan = pipeline.net.last_plan + pipeline.net.last_plan = None + + # We resetted last_plan to check if it reproduces original. + base_dot2 = str(pipeline.plot(inputs=inputs, outputs=outputs)) + assert str(base_dot2) == str(base_dot) + + # Calling plot directly on plan misses netop.name + raw_plan_dot = str(plan.plot(inputs=inputs, outputs=outputs)) + assert pipeline.name not in str(raw_plan_dot) + + # Chek plan does not contain solution, unless given. + raw_sol_plan_dot = str(plan.plot(inputs=inputs, outputs=outputs, solution=solution)) + assert raw_sol_plan_dot != raw_plan_dot + + def test_plot_bad_format(pipeline, tmp_path): with pytest.raises(ValueError, match="Unknown file format") as exinfo: pipeline.plot(filename="bad.format") From cef75266da85c9b9f308469a49a7704fae60d829 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 13:45:22 +0300 Subject: [PATCH 083/167] FIX(plot.TC): formats-TC were testing dot-file, not rendered... + ENH: strongly refatctored TC to detect nulls & dupes. --- test/test_plot.py | 57 ++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/test/test_plot.py b/test/test_plot.py index d28ffa80..cf2fc3a8 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -52,32 +52,49 @@ def test_plotting_docstring(): assert ext in network.Network.plot.__doc__ -def test_plot_formats(pipeline, input_names, inputs, outputs, tmp_path): +def test_plot_formats(pipeline, tmp_path): ## Generate all formats (not needing to save files) # run it here (and not in ficture) to ansure `last_plan` exists. + inputs = {"a": 1, "b1": 2} + outputs = ["asked", "b1"] solution = pipeline(inputs, outputs) - # ...these are not working on my PC, or travis. - forbidden_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() - prev_dot1 = prev_dot2 = None - for ext in plot.supported_plot_formats(): - if ext not in forbidden_formats: - # Check Network. - # - dot1 = pipeline.plot(inputs=input_names, outputs=outputs, solution=solution) - assert dot1 - assert ext == ".jpg" or dot1 != prev_dot1 - prev_dot1 = dot1 - - # Check ExecutionPlan. - # - dot2 = pipeline.net.last_plan.plot( - inputs=input_names, outputs=outputs, solution=solution + # The 1st list does not working on my PC, or travis. + # NOTE: maintain the other lists manually from the Exception message. + failing_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() + # The subsequent format names producing the same dot-file. + dupe_formats = [ + ".cmapx_np", # .cmapx + ".imap_np", # .imap + ".jpeg", # .jpe + ".jpg", # .jpe + ".plain-ext", # .plain + ] + null_formats = ".cmap .ismap".split() + forbidden_formats = set(failing_formats + dupe_formats + null_formats) + formats_to_check = sorted(set(plot.supported_plot_formats()) - forbidden_formats) + + # Collect old dots to detect dupes. + prev_renders = {} + dupe_errs = [] + for ext in formats_to_check: + # Check Network. + # + render = pipeline.plot(solution=solution).create(format=ext[1:]) + if not render: + dupe_errs.append("\n null: %s" % ext) + + elif render in prev_renders.values(): + dupe_errs.append( + "\n dupe: %s <--> %s" + % (ext, [pext for pext, pdot in prev_renders.items() if pdot == render]) ) - assert dot2 - assert ext == ".jpg" or dot2 != prev_dot2 - prev_dot2 = dot2 + else: + prev_renders[ext] = render + + if dupe_errs: + raise AssertionError("Failed pydot formats: %s" % "".join(sorted(dupe_errs))) def test_plotters_hierarchy(pipeline, inputs, outputs): From f9b241506f3d18e2a8a285286e5474bf89822af9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 20:40:25 +0300 Subject: [PATCH 084/167] enh(net): prune also isolate data --- graphkit/network.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/graphkit/network.py b/graphkit/network.py index 696c7687..2fdc5264 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -331,6 +331,9 @@ def _prune_graph(self, outputs, inputs): broken_edges.update(broken_dag.in_edges(given)) broken_dag.remove_edges_from(broken_edges) + # Drop stray input values and operations (if any). + broken_dag.remove_nodes_from(nx.isolates(broken_dag)) + if outputs: # If caller requested specific outputs, we can prune any # unrelated nodes further up the dag. From fce05154256c85a9dd5655d654936c1dec5525f9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 21:03:26 +0300 Subject: [PATCH 085/167] FIX( Date: Mon, 7 Oct 2019 22:41:35 +0300 Subject: [PATCH 086/167] ENH(plot): switch oval<-->circle ops/netops --- graphkit/plot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index b94bcd5a..f28c2bf4 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -62,9 +62,9 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): *NODES:* - circle - function oval + function + circle subgraph function house given input @@ -229,7 +229,7 @@ def get_node_name(a): if steps and nx_node in steps: kw["penwdth"] = steps_thickness - shape = "oval" if isinstance(nx_node, NetworkOperation) else "circle" + shape = "oval" if isinstance(nx_node, NetworkOperation) else "oval" if executed and nx_node in executed: kw["style"] = "filled" kw["fillcolor"] = fill_color From e8fec2250ac293a93a2746f15f8314ce05a926b4 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 22:42:03 +0300 Subject: [PATCH 087/167] feat(plot): +legend & TC --- graphkit/plot.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++ test/test_plot.py | 21 +++++++++++++++--- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index f28c2bf4..b99a425b 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -331,6 +331,8 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): ## Return an SVG renderable in jupyter. # if jupyter: + # TODO: Alternatively use Plotly https://plot.ly/python/network-graphs/ + # or this https://plot.ly/~empet/14007.embed from IPython.display import SVG return SVG(data=dot.create_svg()) @@ -351,3 +353,55 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): return img return dot + + +def legend(filename=None, show=None, jupyter=None): + """Generate a legend for all plots (see Plotter.plot() for args)""" + import pydot + + ## From https://stackoverflow.com/questions/3499056/making-a-legend-key-in-graphviz + dot_text = """ + digraph { + rankdir=LR; + subgraph cluster_legend { + label="Graphkit Legend"; + + operation [shape=oval]; + pipeline [shape=circle]; + insteps [penwidth=3 label="in steps"]; + executed [style=filled fillcolor=wheat]; + operation -> pipeline -> insteps -> executed [style=invis]; + + data [shape=rect]; + input [shape=invhouse]; + output [shape=house]; + inp_out [shape=hexagon label="inp+out"]; + evicted [shape=rect penwidth=3 color="#990000"]; + pinned [shape=rect penwidth=3 color="purple"]; + evpin [shape=rect penwidth=3 color=purple label="evict+pin"]; + sol [shape=rect style=filled fillcolor=wheat label="in solution"]; + data -> input -> output -> inp_out -> evicted -> pinned -> evpin -> sol [style=invis]; + + a1 [style=invis] b1 [color=invis label="dependency"]; + a1 -> b1; + a2 [style=invis] b2 [color=invis label="optional"]; + a2 -> b2 [style=dashed]; + a3 [style=invis] b3 [color=invis penwidth=3 label="broken dependency"]; + a3 -> b3 [color=wheat penwidth=2]; + a4 [style=invis] b4 [color=invis penwidth=4 label="steps sequence"]; + a4 -> b4 [color="#009999" penwidth=4 style=dotted arrowhead=vee]; + b1 -> a2 [style=invis]; + b2 -> a3 [style=invis]; + b3 -> a4 [style=invis]; + } + } + """ + + dot = pydot.graph_from_dot_data(dot_text)[0] + # clus = pydot.Cluster("Graphkit legend", label="Graphkit legend") + # dot.add_subgraph(clus) + + # nodes = dot.Node() + # clus.add_node("operation") + + return render_pydot(dot, filename=filename, show=show, jupyter=jupyter) diff --git a/test/test_plot.py b/test/test_plot.py index cf2fc3a8..c3cc1e31 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -150,6 +150,10 @@ def test_plot_write_file(pipeline, tmp_path): assert fpath.exists() assert dot1 +def _check_plt_img(img): + assert img is not None + assert len(img) > 0 + def test_plot_matpotlib(pipeline, tmp_path): ## Try matplotlib Window, but # without opening a Window. @@ -163,9 +167,7 @@ def test_plot_matpotlib(pipeline, tmp_path): matplotlib.use("Agg") # do not open window in headless travis img = pipeline.plot(show=-1) - assert img is not None - assert len(img) > 0 - + _check_plt_img(img) @pytest.mark.skipif(sys.version_info < (3, 5), reason="ipython-7+ dropped PY3.4-") def test_plot_jupyter(pipeline, tmp_path): @@ -173,3 +175,16 @@ def test_plot_jupyter(pipeline, tmp_path): dot = pipeline.plot(jupyter=True) assert "display.SVG" in str(type(dot)) + +@pytest.mark.skipif(sys.version_info < (3, 5), reason="ipython-7+ dropped PY3.4-") +def test_plot_legend(pipeline, tmp_path): + ## Try returned Jupyter SVG. + + dot = plot.legend() + assert dot + + img = plot.legend(show=-1) + _check_plt_img(img) + + dot = plot.legend(jupyter=True) + assert "display.SVG" in str(type(dot)) From 53c6ce5a3666b08f55c3edf51eb57d32e2078ae4 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 7 Oct 2019 22:56:35 +0300 Subject: [PATCH 088/167] DOC(PLOT): +sample GRAPH & LEGEND into README... + test: some refacts on vars & for the uploaded images. + MARK SPURIOUS FAILS in < PY3.6 due to unordered dicts eg https://travis-ci.org/ankostis/graphkit/jobs/594813119 --- README.md | 8 +++- docs/source/images/GraphkitLegend.png | Bin 0 -> 39077 bytes ...not_overrides_given_intermediate-asked.png | Bin 0 -> 26293 bytes test/test_graphkit.py | 39 ++++++++++-------- 4 files changed, 28 insertions(+), 19 deletions(-) create mode 100644 docs/source/images/GraphkitLegend.png create mode 100644 docs/source/images/test_pruning_not_overrides_given_intermediate-asked.png diff --git a/README.md b/README.md index af414020..cf4536de 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ > It's a DAG all the way down +![Sample graph](docs/source/images/test_pruning_not_overrides_given_intermediate-asked.png "Sample graph") + ## Lightweight computation graphs for Python GraphKit is a lightweight Python module for creating and running ordered graphs of computations, where the nodes of the graph correspond to computational operations, and the edges correspond to output --> input dependencies between those operations. Such graphs are useful in computer vision, machine learning, and many other domains. @@ -54,9 +56,11 @@ As you can see, any function can be used as an operation in GraphKit, even ones For debugging, you may plot the workflow with one of these methods: ```python - graph.net.plot(show=True) # open a matplotlib window - graph.net.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg + graph.plot(show=True) # open a matplotlib window + graph.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg ``` +This is the legend of the diagrams: +![Graphkit Legend](docs/source/images/GraphkitLegend.png "Graphkit Legend") > **NOTE**: For plots, `graphviz` must be in your PATH, and `pydot` & `matplotlib` python packages installed. > You may install both when installing *graphkit* with its `plot` extras: diff --git a/docs/source/images/GraphkitLegend.png b/docs/source/images/GraphkitLegend.png new file mode 100644 index 0000000000000000000000000000000000000000..3b4d273bdb697fb3de02ce4d6bef27fe1c1bab9e GIT binary patch literal 39077 zcmce;c|6wL8Z~~a%tKKmV}_KWP{~YYX*6X>#)OCzC1sv7MM$Mgsmvs$!8|1m6w+j@ zND5^N@mpI@=RD{AiqBTSScJ)5#fKo24(*J9UXA&yB%39v=@9(H6A5{iEs?JX@y(no_a7*G}ufIk+ znkz&=BlPT#U#T*+(?32n)3LD3r-=N0xbW~>OL!DdGEIH|Fq^kc#W_@(;xXLG>g(&9 z`i|+p@8?5fXV?G2HOTp2pZWj&QyOhNWN+_ttkP6M+tG2ukDouqMfE2VFI^J#SVlf} zPR5@F&Y05Mv7{ zI(4?I)Oqz~8RyWuN8br<-1x1$kyqs`XJ}|>*{P8(9sALrKQ$U-cm>!Z*0M!N`OJ6( zhlctZaT`}RM02;6pE;F$=@LC}2w9Ju4SzQ3mXVs8nv}G3_5API;?qA5D|wI8EzJE~ zJ~`68oR^ok?BPQp85!dtdF>dUh>Q%uRU8}@Q^~EFI(D5!HuiV3GBN^ZCcbLy*s;TQ z_UFJ0Rj;nm-r8c@dIpMrR@S=x1s3!>cQzltUTAJ%VX@D`A_^~&p1A^RtoOh7GC|&(0(>hj2v8Li z6qw#VL~n8MVDHC|!5=>!qEvl&c&hjN_n4%O2Lv~5Vj-XT#to6Y{Crj(p1D1#V`F3M z8t=Gu&j&fP(k@%JegA%rn3xzjkHHlNhK9xV%^c(3KX8XF=cl3YD0t8zhBXWf*zoej zXMg>8ar%p1c)hJlXAza*ja>oJ(abaBUxFS#){U^kLb1ro%0}@ib9Hodbe6ei*4^gj zKO(es>#A;_Dc;N5j-DGHb}1_>+kW5xH-2JdRpsyBzk5G_4!wV3pbx9G@x(9Y#fbgz zPfDOa_5Hn?ySp@(%#meUTJ^`TuH@rmadUHfU+Kr6yFb6C$fhi*^By1Isjj05*v*X{ID@|jtM-DsTq z^|RRd^(Gt(n;##W20M$%xidS={oug^I%Z}q509;%pI?nc?DV!}8`3c{20eUuW=G7y zhbL{`+_pC@J;II%Ek4$({{H@no9fquNDgV=^*3r}r$&}iut-Kj^XJd6*mZd;y@G;5 zOKWRDWF+I;Qs=Nu2Op%}x<&W=`SW9*p7n|1CTWF*%j0x|w;g?(kmBa*8dFpxMX9=N z-`vyFLs_$C4b~(>Czg+M^C7y9LaW%66dvq%`bQ;sdHK1?cWyYu0U;sOh$nZ%^{F51 zt5@4~Jvu%3{{9w3$H38CiRdU0mwquqmamgPCue3uj+{KkJGW z6pCrdVdkweGOcCqc9beC4;97L)ph#k3)N;94HJ{qyb2x^iiG7O&W#7|2IGV)cnt3Q zarxuNkJisqRcN zX5m$^eqC_T>(`HU9R&v&wY0PzH#ckS*~9YeitI%>6)C9|l=Bxa(jxZ-2M2GL;J7$k zXn2zaSyez%((3ln(VpskMn;-mUS3UiWxrLn-f=Rts5txU>RH2^dqN5dHqo%~Oh2!< z_u^vG&WoGU)6ys;m3Dsb>sw-PZ%-*q8ugkD4G<9#X?gjwro`dJlMkVJc@mVSfkW2T z{y1h!D4Q(G{0Cm7*4EUhXGH&a&b1z=AmeSspF$OHZEL%P;x#_VdhXmgJ%$TzD%veaw{w&3Si6*KW+@f;({-+idx-9sGIR##J@KGX*+rEMuc zGj)96aZ^)3cDB>~Sik;(RKL*dY*A7=@7%c)tFmxHQ(K#)I%#R?@Q8@h4t}<_g8nAX z>(87%ZS%g&ogC)F&n}1H_+(;XDVFTz%||jN+!*TjUQ_zjb`$3nd3VL%ygTvrHQ7?s z2A#u)Kd-;Hl8r5J<@!Bo2;_C|7#J9Yldf-ghQEzVP7b%J4fXLz)zi}(9UH60pRMbV z^5sY1W6KjyTbNy{x0M#zdoAkZx3_8%9CqK|-)Ec6%*Y_Agi>fpvfcX!o{T$_B&s{| z4@i5B_Qcz5l#qDhk|~jY;2sTS_`^dJ+cWO&?oGQF$g+jcNA5HrrKnZW{GMy{@7XE4 z;m%^m*9E+f%FA1`4R6Mq)u*SY>+af>kW7`5m>7ahy`Gh&rlGO4skeQmB+m7|sC(F9=p+tn9{`r~8%Q&1y*x}32*RP~XzAm)F zB9e-se{;{(Q5H* zPHe;Ymscc#j1RwAiDeB73!^>x{e8sO*M&_jMCZJX^rfQ zgd1|@3U^dg)acj3O7$H(=&_XnKn4CLvUh$?PBw(I3iaUhDl03yytz$H>FMte>gllt zBI)y6nC0KRd2`XxciZrgBsk{hesxy(oY^7Z*%ZGnJSr+UKcC<+X}|fiD0SAaZtONK z_m-hpHz#eBJN-kLV*MmR6bPdV5Fjog!RQE8dv`ZKstDC+j;X7YQv^^+#@leX$9lxfVOH;dseJk6->&+lSJ{(Vmn%-Kq^X7Qc~HOsZH9ke9Tx(kHHqI z-B-4S+`lj1*Vp&Nr5`UqM@L7Y%uV;lBFKVcW0wQ}oR5nOsE=HoiNb&+)KKQ$U*t7r zivrb)N8^;TrikoG3I7LjiP?pbEy$6KE6a2Ks$xAo+DjR&F8xOSFkGdsy`5e0)OSr+ z*Mb&(vFOT5RUEgLToXxQ5fMS`BhELeXr>YkFH{#KJTw<;d}vg3w6smlGT=VHBP-J_ z;seQR-C_#~39&)KS|@wwRZEM1aj`Td4aq9s`;nC6cFSB;`qJ(YPvISc}mWV7lLR{pZH?1!Vl_sT={xH^-{2fwi4**D7+S+uX z9Uri~qy4d}w%^{CI(Anm54|porCLg}76-y_{?~dpbsds%0{giBv>HpAklSwGKAxX! zwwwIL%SM?${Yj4Wb9`{5@SM;;=bpSJzW)F2U#$Nu6j`kBaLa#|?az0V|7pkyi~ld} z*~eg(?V2@D9zWi0Zq9YPq$Car3hCg`;lmj~MEmsh1J0de02Gedo2%&yIjZRJ<}^eGe4XzgmNvjHdx zHSgYWA3l6oU1v5;!E?y^?H#A^bLRptUR?b=RdqSyh`bj{xq71z$usy`WEgU2_*8wF zupUf2aw~v>YEV6jkX$*rxfyxCWdaY{0df$SkCIN1;2oDYY_pT!Rw{7)i2+vD0BXAo zv3#UUG-8hJIqZT$Ladyebokk=E}i=Z9aBk+p&2XlTTmJP_D%Xo+Xj-lWBHWX{_`UG z`Jfo|*@2Y8EX%!nZ>IXz1X44u;NoIH*rnSUZ#a7Ns7u$QHE5Mdp?`StTan*_Z@S&z zXJ|J^BpRy)1O$)-h4g;x*AM%%3%{rK?c29PA*sz{sEuAgK!8)#S1EKke~f#7gMh58 z$)M@qgH+0bjWX6n(7gZfq4nV@x6U_jdeI^{zPb_JQY=Q02U_U}K(`u1pViqh>Ws|H zkQl9he%~k;dBaEx;DXJrA*y<$av_G9dt24(*Q<{lIilW};~}ISRRtiXZ)8N^AzDA9 zLEZnnHw}63?I%xePP=y~UX=^j` z80q2#`YAj6YcuLjNQ_a|h7B9YbCbYAy#s&=0w|>Cjr`|bZ#CFfC+n`gfB!zG+_AMU zU%hf4>(k=IKJjMj#mB`}BWZ#7=oubn+PHCJ=i9do|NTvE@|zSi8OKhZY(S23pPxI; z5!r0ho@1=LcW)Rdm*>}%BY`dHdH>Q`w;Fm;RF;tL3^NG2#bp|Vsp0x z4obOoDW&%tIcsZcciwSYNjgOIsr0;y5CO|~s{w`QR{YtCc9X}CA6I?<@Q@^PkCY>B zgDvUPpRc${**{sOsipO=-_Rz%!NwzZjC2#7_fM?n+UmI0BZbTE*N;z*9eHcszkeSb zqy9Gtg%E_o_Q=}F%4fC#rO+uVDyH|hZ9p+64}`#L%o6<@Jz|6zG`Ne_89A-0wpL?s zgG_v4B0FdzDd!Fe;Ou`rhxvt-3Kad_CxJmpk^0N9~*{P<>6Gd*8>2L~Z#W#0PwdWt_VIG`uRKQxpEdx(FwZrz%emq)_I z@yL;+xg(uL0^{T3YlUh@cKs6u^O4m3u?ZJ1hT(VdTmp)U+-7EG2viDX)v8r%#l>?! znxdoJASy~l!G}pJC}3H)ZXKz)D05bba30ZC-7+cUBcHdR!1BwR zKc7wor@zqt@L|Etn@RFl+qH}7nyOz+XQwI160lSSA5|kxpFYjraT6~OIFX*69f4{@ z>W#OzHxRS`wQGFjkz8D`hWQL%FNnQ}=9WFyS4YpI>YK7hq2j_%G%o6rSI+@ zU8%s5CH+NuFQ1((s4)rcE4sQ%*;0E>p_Fo*3~To_+Wp@znHyS8`&bdEgEO(k(gO1;7@alw!cieeEHlJSvNs& zWcVR;l2m+3UjAqt;_Yw*DCj^?TUN}>%s3)kz?P(AMK>#&9z4j4gq!tT&^PnWn?|?8 z>&h=>rChrflaR2guCcKn#XLMVRtHNA3Ix=}G76{%^+=A5pb`Rc=F>dCb~wJfyAc$D z_*i0Qtc=&FH6Rg>Nr45cf?j^_w{OfQCMGy;X=vM!g(i+4KfZ#KGaNOu@9Cv2I9gQ> zbB|dQfX*MB8eyTOr3GiqNJ@ZC#$H0cA&2*(j^YQm*Zn@@wPH=g+}uS9MW$Urs-MBz0J@>i`RbLt(*-SmU}oT0Ohd?PtH;NGcs%s z9sZOse(Bn!po@}>=ab?>#-m)W->95XUbk_jWc$+mBGEv*c&@6hF21iF`70InH~EK^lg z<&i(W42eM)KAJ+5!EyE7|;fq`*M_%k)1X8og?i48QY`o(3df35nm z#tal$*y-G)q9JQ$lq1x<45wE&0uPf;2dz#Bb^ih(*a8Csj}eTne1-!+XERNz-md-Et#V2rmUlxG4@53=c<4J>8SPKea=gyrTUpttUy~q6lsU526>)yOs zV`F0jngfL}4l9_5BX8RfwFY#UeyMM&$$AuZf{ss)bVY;gNw6@}=fjyJ@QhF``}57y z(JYgu0F0Gz`-?&_&?MKz@~ditbFHbZW#!|GLG!v+S-JfEx5rPO)O2)k5NvZ^Kw}>O zdba(I<>DJQYzMqrvUDk{*UT#Q0!s~jeb!x9WYrHu9(Hu@;`wGStK(d9V_iu=E+aty zt@SV5h2{iYL$&%of72;r<%<|jvEY>19DMMFWl>3qMyiTW*;xjX6ZAJW21@(m(59&P za8XfF$sPYl2dDwsBuuhmdP%wWgnFT6`H}kJD|L06%V=r)L7K>U4y|(SdPEP1OXv)A z;^;usvBtqMX5z?h8dg^P=m2Vv3`1fJzkVDTsI97^lpKCm^XT-?;26V9Egc=ZXP0F< zZ`<#@e=ZLv8y%Nt(}SvQ+qPkyb|lUkd;$h3^PUg`Z5)i0NRGoNvB2D%99ux-idGzbXKq{NAHXSdWL2ZUpOr9jW^6#UlRMT!Ar01vf#+&~(}X%t zQwx%^uA*#3%ozMM(u#N7h@pcY`9{L&rg!0<*wP!Lp6xhA`j z?eV>C-^f=t_x1yZsOx0Mp$}cMckf;txa)`VC#IYLF2HewL6gcv(%PrbIvKyPH}1-n zNOX98c*SWHlra?k<$RKC5iR%JtO(d11Q1=9m8X0zG7=p?I7mOztDQdmYp}Co9nEBM zbyXK{>-D&T7FO!Gs~5rySG)E+9<%0AJ;9v)<&tA}!A6l?bE*&{v~9m|ETXxb`$rB| zPb0O0N>XIuks~DC_((UukWNf+dAagM%V~dz9OO(Pm(}87@q~qElWkjH zf@qkfz%*gOX*f7I5>xicfr4l(IQWpzcy)Dk3EokbnI;q-tDzJR9lM@QJ~O+hS{o z1VT8?r`MFZ5$ozYc^#0ktWgt4ib+BmNg+`aliZ!5c7-u!8?$a;P?vmI0#JQdrkV(QC!7HBW8X{)b`SZQhXxSe>HoAXre||sIP2@p~N?(L`I{-Hu zIk~Jt+6gmS2GH-wQRJ3&t$Oy1`N@+fORncB03?&HFh%J!8&Ke;35FnWKTngTm!3R% zGCVSJj zS1Kq|9!0lnkYIvi{y+(1-Nc+D2OoIQgN04rY|lWNTaYRe2ktTfx2fyowhndV6J<{T z$d@!-Bph5^qL8)ddGpP8?A_Z40_48RA$FU?hiQQLmeJ6VFuo5-CoeDW7!Jaz$x^3R zG=2-eWj9$qdVFboq!s&}qTtDJb8l*BY3YRG?2A;@SRnfTnh?e%*K^;3`?h}nNS@_; zcVxS&U-a4q)t=6KTMix4#N+5+Uh~D~nPm6c=@H`=qYG-_Cq_SA6v9FXdu;uajh5tS zR?ErCu0}7R4KQQl=opUewIj4>OIkon>k)Q*&5hPB2Z$ECpWXu{9(+DydZsZW_5nn% zHn=o2Y+j|VkPtRPuoLn~IATB2RX+H(RQ#IKX~E)haEGQ9XXSAUX@DOCUK&ecQ%HYK z@;3-Hz2V5%*s!ZtdC~q#?0b;24ode@T9&E=(F@YO&DYA<%c%N8rw*r^K6x6&UF|n3 z$n*Zeii?uhfVCo+y?n_AMYbNb%pKSP(XBpcT6!7T1t&Zh z(4hy!0_63^rVHysVvIjs1;>cI9(wog-4^{p85}(#N~o$@A@pkS&{Bt=a(?{uX@|>5 zM?N=Tg!Gw7moDM2LQRh!_drx=K;g1+bCWDCEGk+7UDNcQ>(%XFaze49H8t;f?zw%a zD*n(s`g3Y!y1C8{Jc(X6-+nsz2Mo#zrFDeN099B}x=1Sz9mpVA{y2wUj?)Ox;=^e+oI<5gUw%uoc?#|!HHS(r}c5>3Ewf*fX zlb8ZchukD}ot)C4oNVi6{*t1J7jHU?e0f7zcxz_YEOMpo27&BR_l~{`1#Mo{+R6&H zAP`*DKK;R^ZEbBNBTJayp@VE%j|3eYW3-MPnhGj2s6sXchVm=`%9ge^O;gk47X6$% zILtk{g%$A|IGUTAYeCjP!ym}&V?aX|6vDW=4`H{@VCED}K9mJ8ZG;-`t)aIDxFM-n zL|(D#O#4Dwj=@8>6jS3U&Ng1ZZx-hm2VSJ#yt;AomMtMMi^)neao734G4Lh5Lqq4l zXz_J*b)jckB}kKaL035N;X_4GH6iTc`>i4H;Nu@hTQG(y1|Uzz8#12>jud^)3I*-N zL^N}~wP8CYG@u*DDxaCOZo8$wL?rh<1mbYJ#u$)QrH2mK*a#4d3mRXl3&z*bf7_vU znf|C8GyOcz{#tB)m)V>9>~Ae^P9CNk|EW2i8OAAnR*r#@QT&e2pVRqPtlau+Zw)o6 zXYgRy^p2I*hApo}qYRFArNR^1XqX`o3ynwy#LWS&M%0ZVzM-KZ(EfaboS1|}9agUg z{*eC0*qx=4PS9s;s;8G$%zjsdDM#1#%DsECp-gKG1iqQvEH=1AT0Hb9z{W`~J9Txd zfT+Z+bDqFU5(R$I_Q^C=vb@u+7WbUejSpkeM;@Pce0N4K4@#wD$lV8Hoz@tr@;ncC+LB810{6?qTM0L-ZXGXGQKJ8eB{VEWO6~- zd$y8q3g|z-NR@QY2Jba6_wNv8YqE@acW0GZj6z#`3K!#py zBT*Q|bKi-rLz)28JUj4XWJnttIu2ePDjZ6WGF3X%b$9Gg>#pztOd|#=VPWAaXqrS7&dn81NlAgI&eYSM)$l)FsN*2PYyn^gVG?8E zSF!j$z{8vgjPpGd{GBPsRcMe{Y zKJZp9C%oA5^2-^q5wE$<3bC3il^35r-IYCQ_+i16DSSA@M4X47CkdF zL0!FHzR&|55wq6zFRA%eVT}h)ck_>YEfIXv*;xhLZTk-3UF;EmI-eAr#N$Ff@X=j$HY7vGZ09s3<5o%Q+9 zE%rKg(<}O%)Ge3%Q@Jg(*rR;To+aL)>=%{?ifyUqr~8-Bp(!E2f$02zQ0Z>n6)Pg^ ziH`%I0ZyHz5>^#FTMj?1LZxQ~6$MK|QeFV0J9<#3c~+Ht7dIVL2W^6%4uvfuI4rCd z@kJVf9OGOmmrkj{p&>z4RnmHC!UDk|VM>8_29`090&6J#(sm7L*xr_oj!;NWpPya| znx6K8DT@}RP8~hN{nRixg48uM$a~`q*>>cak(M1Ao!GbIpCeYTC&G%8lT+eLE8`@X zG)n{BPdB(Y_1_X~@OIMsn)|4K%x*S^PJ;8|ceI~uv0=>I`+I%PJ~tJ8cG1e0e)a0r zPp>MjJ~KHidS9vH&Wrs$wf*^zMq;NX%v$GXXBN;r7<|#ueR%4ddgV3VG_$=5sfliL z#bcCpMdn_knsU*PUzi*Frh9>M8b&R8PSS?;Ui@zq)I`)M|hJ6lHuMVY@U-?W+h zXmkGJ1@RJQN~g~t(@B1XAXSdN4Q{Frs?%@ER9vd08Q2tNQ&;6$W({MTG6$%^opZgNmlc3;^L<1?pY#llzj5NBw-PiJwR{+27G}VRmdpC*ha&~gE zf#rf2OwRiFAfIf8F0vgaBCy<8FIv>CKnPGm$Y|W#T((1b-k)cxznR+{WI}Byv&Px8 zN?;DG-P~gTZ>G3In_;vxHSIjJ<&T$bEOT*N;X#9M?U9!>1X$Qjcak#=t4v|b>j@vy ziy#qMlzW#N%;Y`+^LBl|)TS578;|lB*O^N0cJfJdQ1aEYVh^D1f2r&{XWHlf;+nGf zqu9N?;#s0#n9!^=NuzOrs4;maR?_)V;=u2*Vjtfa2!%V8(B&uTsj4$DJ`9n}dzSUK z@n`U=yf;I!`g;lwyEH#}Qk8ABT3Gl--{6-OlWgcJHP*9|tUJ6H?B#9E0i@-DD{9<$7UywLKL?~IvG)}Q0mXmn`&&F{3Q==ABF=xUrQ7YbqS80 zc}Fd-mO~4rmt?cKRVdW%8|e04L(PV@!qZBq*r(>APg>sii1)A%&B@xk%8k9n-pkeJ z&3@}2wRqsY8ib0YOJ~}l)cxfTI!k@88I@Q)@TMJ;G_ZCR{AFoubSuTPk)8dczLm~O z$R*kTS#cZ>-xKtOV112dPyb9!PFU46F$WhAj9R>P_4SD-x%^E+7=3(vRsmWAj1Ysy z#fz_X96*295XBudmkmVoqT=E>)8&S5Uu51E6&K&>DIc(y_RgK-zoxvuwTrJL)w^pT zY9iM7l27?=oG&<$4G!p~J%AD9yq5)-(SZ1bZqUs=fNDsP#3>AJDkwkyXm+FTkk{Gj@Z}-;KI_%TmU-#C&JL&Xr zXS%Zc{Bh1lQ&;N>87-PWgfj9P6Ck8N(Lp5GxoV1QXT6_HbdBM1EchsA`$|Hu2 zGdFKNt?1ehnGCdoo+pi$=XKzOF*y|#%QWFnAiju!fcUt9r2uo`n772DJoh>?>oXyZ z6OCRd-d1{aXyEW}QI4gB1-by6+bz^@8_i~C=7o1oAF69uUtS_9x~}}=i#PJ8JI{RN zsqh#MU{;)0pDrIfelPuoIqm6RlI{z=C8s6_DkaXFpQH{k3F&hvemVO{VyA^V8xs@L zvahL&hg3`_YgN>NJC0OfS7FlED@{e~oemrg-n^)8GC7%(m}*wpcg>NRQ;YNhL8un0gQ;);NM5 ztE#iJvyErsY29M~y=0SzMUHwVs~xz+wasP2n4F-XI@ap~42meJR22BvqT#axP$mwa zs)wohsR~_PzJ3Yq4qE4;q7uC3x+_zM_6W08w^UJD_h!x3Yl>fJK=q%lwtHANsk1lL z?!t>oS#OIUjeB;Tat|C>zGo7H#0l*>0`#NTV;p*_Mf~~HwK5_<0tb|Pr`1`$jJIk*#DTXe7qUR+E}9WUC%G$ecRhWD8> z#O#UIz8+NK`Sa)fA#HATY@vX0($&+eK?RJ%0EV-(Gm-URBJFxItMu}f)w3k;GdluT zOIjMeWn+aoNek2{d_HEPzm4r7-`V@Jj^0<HA z2JJIdm11f>I5N@KUb8K;LaM3@K|cG1!8b=Vf{|tK3Ttus=G|9l=rH#Co={dt*oW0g zj@@rL!6s|?`clzMd<&1tvzX?87|ffyKuam(;tEI}*ikgw*Z!h_-$CAPG9$Y4_E%1e zhH(Ivd~eUzn>uHfrO<^2l_;jncvipaT&=sixlSS02_9m<<-bp`Hou;=(Or-{HafEQ z(OENpyjYp%0PEX3M;OX!F6-`jR`u-Na`@aLW4=9mGJNmA&5q8~JzL+JZ*5E#PgbK@OScW&PfB*QCtrU5uv5O_2&83A|EI_eMLgWp&9aHB**s9}uZ zS7p)A&>+wjjxhYA5yqkg5KD|CXyP@@%+?^Slilq3^eF^~iFgNKKz4jzCJmDT$YcND z;AMmdg~UswEg0Ph$xfydh^qtg&?rK+7qclu0FS?LVGJZzAGk2$LW19`CC{v6b86$4 zS2r;Pa-5hG;0M7Bhn&~wT0)?qTM5?`^!WCcee>p*mozPrKMN~nh(%W$ZB^Dw9nafH zf^g(n_f!QKI929Jb8~W<4#~~DFXn?!JJlDkf?uT$qKh@iMlizVuU_MWHOuh9n>ycB zas3n8UDHZmWf;P#;gQOoF#G@?gbfseb+M$QMP=i{V)4s;?N8XYsabLV{OlWJDBqYC zA*BfbWprYq9$KSbL78LQt++ae+^nomw2j;LP0r@zsfg>TsZqi5Em7?93Y~omdI5%S zI*vm_L#Cg9DTd0OrQ+S_nLUo`FV$JG#kT)~yNVps`WM~7x8s-0TPRUal*%0Yew_KA}%}9fA*N%V6avP{Rtx6y2sRu9-a zu0F7R%IT1OXYV5G#cnq-)Lw+P5=^a1X@pnY^tTB%m6xE@sOApH&7a3)44Dx_MAbkK zQY~#}AoLiLjb7gB9sCZE3}Kd~fir$7`2F;LI#+mmAVbvxab&%mzZqi|7DEt$&(rz+gLecX zwZCK%+3iu}{9#JHF+KL!OwDd4t_7hzsVdBirk};L)=CQ(pIuEb{jpQ07TQGA#qF8x zdfIQV65pTak0uSet$6N%_`tFMoe&tH=}=X|M79hDt7j22q^F$CSFT_Kukp?C0p>Jh`|&Gj-EIB z3C4faP~$PjBmU^yUfvR(uZ4KwD&VC-PUHBToFT=M18yH4P-7MhuSTDcbDZw?JgPJa zm_65&M2kOs_)y;PB6RAogNlwgO5|0T;j!(@^o&oGi*gj2m+IL*gy^s*MNVJ* zaIGq>x&uE$?rsOuD8lXP&rxZ<&tgJ*NTvrT~O@ikUxKeEBjL zYElelLG)ZyF*rzsDl|<2@$C}CMh2d&x4&PgxE$s}vOAD=m+6TxF)}h*u&{(aGB`NS z`Ga?lRhg*Ab#uU6UaLJb)L-w?Ry8)#LS$a4z_z&Y^xKO9r73N&u#wpZ0;N!pFqp)L zP*}N{tvm@F8i-X80BMkRh<3E-?KIAM^Y*Qvl+?aK&haEHIB4S&q3-gWYt|4iHhP1& z&Ux@eWFQwA;?~$#uAPyl5`@k{TNDJ(pWg7RSFdUTeF1_6_9@t`;pFU5-?!qy#2|em zJ)^Y4n8?S^<*O9-=X=Zz%a~4G2^|gRgc`>MhKTj=qieR{(cvnt#cUxClh@3+=+kG< z(D()*v!-XZ>mj)jvERJ(NC?8P>cxxYz*lhCg|@WjC_!!_zzwUQu9Np zQkyr|0HoX4+pjBDMvX^@tqIBPesGJ<1K5te#|No!>=1i9hMnPofhF4NtJ@Q(PX{rS zv|2?)rPF6xNr5>z#&1bsl8>Nqc-i!!vZwPaH7aN5%V#GwOs9{ch#UW77#3bz&Dh{Z z77F7tb;EB;#y~ICA~hYj>(U!_SI+}V8fJ-Vz?DMBXv74MG^X@mW9Ei2{?>ten;h1d zu%hQ9%|86=FeK)4a&nTo07-0Qm=Oj5IGm+y>*%0;Gn_A&!5COIlpGsa&oPx%S>gvh zyzKZV2Eqtze~{w@?Y4}aBXrw07pbS8)-!Qct~e#g$H<%8@Z@<@?F;c!)wf@-TGky^ zL;BMGl9edm>NH>u3nY)95O=XAX@}}5Q1|YF?6h0;ZG*p zkhvjtQW2vPEY58YCbp_x1UErUR?x37+e{&@DzNktoPJB-6m)rC#tMIv26C$KB$3FabA87Jxy*T0xX+_5ZE>i<;$d=C9X`uCU+}+xRWxMZ(I#&f%`L!eT=fe#RUE%K z!?m$7zi-+lH?vtVf0E1H09DBJ;WxUs<0*lOSAI(mJY4AgUOo#>gqX`)7fMczxXX7f z@K;%R>g@2?C!8Ue9UZ5sARjWAbY8w8is{PXEiS|2VMp!nHuz(}wA(&-cAqL7=x?=qIP(2#h2?8czk=A@sfqils}Y!@ zbRlA#s&+tYz>$P7|L}nqxgr#@1({@pP+#1i9gOaf2;pGZITbuM?ApDX(4y#@6cjHW z-c1@Lcv(_shveHU0Dy_541K1pj=nkE0#y(>;0-Ao@HH_o2$XT|s6xksxWV*R{;O`f zh^Q!FY9TVEfFHyZ)8PHr#M=+C0@I-n=;#x7BKNL1J;Ow1ostd)Uej<;8~UlLnG&vb zTHfvatVsO#n|78}g+;?-d=-|(OIM2Q(YEc|2y==CsJ-Doo|Ws{SKYRr!vNXQqT`?R zgx9U>EqS3*i?l~f3}n6wiJHfA?lJI9oP{1pC_=~$p%CHhK#D;0&h2DP?0Ps- z!H7K{et+L^c!Q@oh%mH96E*7V%dY{}3dqTEVvtt{xh{3Kd@UWZwP{Cj1^{07L`qg{ zhu}iD!?_zCzMhehSo~%+s5Q(zZP>Wc=es8kTqg$hp)NEsau%w=AWCrka0nwA>uwwR zYWMiee3ARrsCCWvdY+ei6bp$x%IItC6i}qkez7o`^!3p2`K#4)lB;|67_e2$&o~48 z84ZUo0*34UZOhcW`qR;icTxONv$z7 zpENfY`^=t{FfHmyJi%G{^SR>aj~_&j9UdNrv$*B;`drNw%fNZ{lBpL{Q_kn9e!rsM zkc>*r$U$le7Z+D)f=byr^j8WxYn*^#|9;Dz3BEUJD+Dd^PT81=`rWcil^bLmFZ{eEPg zAIXAjJ-O}yivku&@w$O1%+S;a4!l$Nr4zT7j-)?QEk{R3^=1!0J}*GzX;_%*VzOUi z!)UXO&Jo`VJYU0dp79XUmZ?=?46rUHBVKRkP6Abk_drZ+D6vo$`5*Rx7EErS>|y39 zhr^&Fia05eHsxRvAgUX5sfy#5@LWW)ARaq-N}UDlKNy;upF^G&5*A*DK+|w_<#SQI zTifINq2*on%C2Ikr4gFHBn^y=^~^b9Qh%+Sjn(XLi7@@RdR6)r9}}i2z5s|Hqcv+t zZPw>CxXIYCFz&JnM5!FEaEQf7*w!O$WLoK%yE_>?2i=3ux3&+GDOcBYd5iq;Jx|$K0<(0Vo*Vd5rbhlu@Q`iPmeY7lXZp@ z-pFYUZa=_;(NdhgT3j3u4K0(*1no04yl~>}#hrS3IP*&YP)W7{{bG{6ok{7w(^t?b zW4&SgHE-USJ>T&q6^E91L591^xo?--=9 zA@v*speB47!u-6hFqzmv6(l?n_6GcodG_a#W<(H<^a>0FL7*nrJRpG)*E!mDGF<|E zgEW%{JEZ6F;L?ogq1?hzG$(zC8<>1oD(<=Zqg97r0P`I1m2HP2S|4NR0H9_A2nHcl z9Kz&W&Hb}hH}o^LLHPzCh>0;4=Il&}z{DpDyxNK)b!4}*0NoPw9AqvO)KD*;k&HvR zc2`7`_yUM5^eNHv^zT@M@h~*|>MDxFR?6ovp2{H<(=% z?*tqTyF2J;N3}DOqb1L3@9^V756Mqdn+}!-F^;`<7nrBnRM)Mk_jV~}7}6q-I*bo} zpamYr(67+VOJqR>1v#Q}#iXTK(Z92YbdVK-O_)M@0OV-mw}vMv2qg!&IRJuRZCl$a zTzycQR07)##xr&I>}kLY5|bDB5#zb*H&yRf?}bHapi$DZa69KZ!;sV+e$xG67aAZU{3z^<5|p1XTm4n z(%v3&_wLDhkLB4_)Zv=g)#$wyZ=4c&kD66#F%wGqos~Ff@JK zu!q{j(lQ1{TLBRfG7B%q@5ACZKe75;B)^8BzD_PjL(v^6wb^YOTN|D(+jyC~rmc?d z*_AbE5x)KAO{PY3oKuEd;!-Y0%!;ll^O0pXaeH4|dZ4My{VU&(ABzjZlo;~Cpbn$abw4j<`;Nv_TvtVij*RQ|XR|!ux(bI7t>t_ji-)Me^ z+|h!mPT+JK2DL!!^O^t#o2_{BBC0h9T9X0EL(6j^$kR&4|y zRF6sMW1pX{Nr4BJOw=jvPb5!>W?;{rJs58Zfdy8lK>R+~1B?5|sR=WR&y_m2kl2h) zU*z_J^b?DnB`W5Zw}u4x3JuD*(v@q!ZF$um;A+RawZn-L?akHF+OdS<=%}0as;K(a zl%$q#^@X3|@h>-j99W$(?Jjzp?~}lQJ|CaT6P@RoOFk++)i`d>vAJa4^5%`hK6f_D zXq|C(HZ>~^!)y_{A!|FkAlw?$)@S)Aj_vCMi&H@9u9cBthr?0}LnQD$g<)6DEg30E z#U%yl*RL-l12E0=Cxj2e&n@QuQ#d*N_>0fbdCV1%D@XdSsF0_Ih>9WFREwTxPoQmVLpw?) z{c38~hx9hCi;YWNIK<)x6=tiqcid=`)~AMBR+TD9WNC$N6lEWuLDk>V+qP)~|F&dk zF~WiuD0_1eoo5`$jR=%pVDP0vUPqq>+X9QNj=;LZK$kFhtROcnplLtduO@#5m-RqA z2q)*4H^fq1FfNTeG5$7d;KGFq?(h}sonQp#$c^Lw`03NUIH!#so}R?QNb&s)oz0fCOf&PS+fm4d`McwDPOM=|_L)Hs?mTjZHJZUtD0G@A74S-hMzetu5 z=uWQAI#P{Wt8jmdCt*c~H;8=#%{M*w^Tk-x`qV0P`NRJi0Mv`c;HxFl zV8oX?rbxSdrVf%0u)EvpzrP?Ba027f@yk`FDVVp0Z$|=ys+QhClHR3gf%?ESq0bAr zaRWiRD52nAMoVPOFs6-Ej~FKC17C4Z$jBfE za===^8MMH)&~58tG82e9uj|Z+4NP@_J_-MN3o^%8nukhC`fk`Vf`HT`cy+SDAT2e> zUIp2`4ogxW^T#Wtsd@dlT$pqm9nEsSD8f%ehi-ZOx>1MUNBE?02zo&#blT*^0pYi_ zSWo?DM)sAdy8v6$$EFK$@B{wS^8q^B>KK@U#sNo3ct0}M3#<9QVckWHjem)e`(9Xm zeto(~rciF_XP&=$l^Hr>v5;sA$U18%>hN6=SkZEQ$I9ZxD}cyKWJ_Q-H7DVOszP%K z=|v6KsKDb;7js>TOmI*wrAdQNj||Piwjg>^_)pXb3MP`sTavuRfdxlY3I&e9hWc~t z`%DVoO;xrBp2~h(M`PZWsx|&=MU-dXa)UXJNJMX|f7T`)9bMwocahgC8 zM9?7jSRikcd5*~Xzn`%*7@v`PGh8-A;k)P7t&MKS9iF^mq4pevxef8Zf4s1F6eb^X zZxjSC+s41oo3jP;t_D(YKon1CWz z;}pS_Nk+afax-1IFsllhG=leAG+2amWLa|U5~j1s*_{6MOYPv*gMa8S1bwpURRIGw z6o(r0ykOXZpo-LDG85H6!_x9vXKM~xD7(RT_au3Ecwk};20vtt@`~}v6^j2JWEaeO zi4lXdQy)$dy%!a-2m`Vtn0sqNAXi`O9J3rm9_oZAAAWW+C{7pX51y(AzPrJW{41S{ zcX5!@JIGHynL0i#Sx=DbK@f@h3CV&HH*&cbs(A>fCl54alA3VC1Er>OIBC61hMPgSGH^(ev{1?z*P52Jb<-88UDTzOP*`3Al>P43m*p zR9$>z;#kD=G9w}@beBj_P>@Fcp?|WZV1{g_z)6Z=P6hH0xw*^I(vloBm`IC2vXZF) zYHDh7!5U1}1bIU|B>)ou$n8-DV1=-VNFO}nQrcpduIMz}QR-4`*RH=GHAg@K$y)$y8-ICDeDN4v)J$A4V!lun3Aiet5?V^M2QI%QRLwR>k>O!`*g%huJV z)JoHt3EwP*KYjAo9A2U4f{6~;ir5rsls~~>xqJ8SV!Jl6EJ@CR&!77~oa$!B!zhBr zLM;|rd>HqipLc^3!aqcdM(dBZn4nG6Sf*?v1{}F!yLdW`kdo2-grttEt(u!QZR*5@ z0FucBy<~^)xX`7e-LSOSuEh{0N>A4nH(sueO0`iNuZO)DC~pIKpCbg40r?!-yoOA8 z;))OWp~kw6U0*<-0PD_1Cn zZZNt*?$*i4!civEI=e(_(OAa;Jz!FA{OjxGj2sf=9v1z|`ED!rM7^x@dRb)dPMrPX z>pU|FFFuyP7=jSKfIxZ!Z!9@V|F*incc~5GdFfkz?qh=iB<-U!Mju|v10dtej zw%FI>KH2@JL$7Y_{Ql`Y0MQ`1Wp~V8xpIXJn*i^@_0@oZ4uK0zE1(KM5UN4C2ULVbgLq9N zUGxqC{1Jf{y)YRjgwzztyJIVOM_jnI9jyc54ajGKU=n~t-PyXBdvu!?pAXjp1)wn) zxR9qs)SxnVA;fTfjJl-${{3W9NDz}g5C!gP5tUtzh^JmoX6^#<89% zDq_bTDWAI58MkT=dYlz2R}y}hxJVRF4);U{{28}+GF^T^P(J6|64QfIE`E%GYmc)n z@h1b()YL?({s$GnGcbm8L+*zmo|Wy}DcF-$eWt~zt8n0eeK&a|K8v)ROYX^uVTzX_38jhP;Ss~btzWjS#HRL8FFvixOo+P>u z?OjuI+H_0sjfHQBe1f)glecq0vQfT?q$S@aAJVz=o)58DEE!!oab z*>AiFDMenO?GC~zfaD+WNIw7VL0jvNlG&%I#cy+!8q#*0 z)Py<#FilVwcodcAIXV4pdAUmpXC4M=$XZ66APc;AdqX5^iQ^9>w>&Th^(l> zc`Y%NuY}Df$LhzALjX*P0|9V&5F5_GY$tCq{O@5PA!A!JLM3P_axf81HQb!hY#deV z!*Ci6>Wxl|4!BP&sNv!Af)ZJ1qsMYY5U7ck5eLq^g&7*yNJxv63jRs2s<`O(?_u%( zL|>p103T0Sgk-#X_W^Y;DluS=#NL{Kz4#vZdH?mRM}Dgz$*f5PU|}KYGf+Wdk$}Fj zeV`+Ut2In@QM8~PDB*2?Iz;He2l8>nJT@bj(Hrvi0+#{7x_$PZ0YtwFE%)EE{&Zy$ z6LZJCKzj7fFz-`PdvGkW7TWy{+qQZ9JtnXuKESSlm@q+&NuXqW0l2>0g)p}EmWnS7 zc8EJMYSGB(XrN$Au>ta7>PAWrqy{p9M^+%F;9SrvOk_ZCBo1d!xpj!&79r3#dYtOJ zV2O>n49N_^!*PmX%egaow7YgumA5AULJFL-ZOcQQPl$-nZWX=T99#66(dQkCoo7Ko zt$;dGb4`-P8k)GO^J4U)2DxY=)j~Q@@x^g8XS%wguQYx1SJ?Z8=GM@Gz`zRamM1J`0EeQGQl{44P%f$`18 zgUcGS^@QyvxQLzwDPSA844ltcvJn+t?S2L#n<8lAV`D#j|4s*+7Xq0P(gTe{tpIUS zL!#11kqI3)5K>8a>cHF0$eziwY=1SMO^Q7HoCx(#8oL27x&d4W$AT-UIUkU7T_a(j zYyMRi^%V7-&x}Tzozil>@mXs4*+P!07x(Kg9zIfe57@Dy`R4^2rspTuw>5-XH9H!1 z8gYjPg{X8Vhu=`s`s^}ngd~~jpdbEE-=wOX$5|f_%O4^@pGg1O&!5@VHMH&xokE2J z@#+^YyadLeR~U2DCXH9JI~{d0EzGz4jcXu6!8U!5%}%OMKGR0cL%sA_@+y>j%~2)h z(wn5WFYOP6s7>bC;U&P^*mGxkIEnr09~wY46__UBc>{Wd4dWf? z7!OlOaEAdWf;%9;j?81WJd%ANWzIUrRSeb?Os`b5tfEqf>%VQ@yycTYfzr?1=NxXQ z#@MiJ??EgLb-o^Q|Eb>*-XS}#Kfy|ewn(3V=b|=8&_4e-aQbrXJRo`?%2$YgI8L7e zIAm+TTNg{mbD9xm4*Fo*Zkn5a`eS-av|z_(`*>lM#f>4_qZ~&R+%90Ed24S92oz$=i^qNQ=1s~_lW}fZ%tc*U732yufL`|y z?JK%si$lfpT02cl>%S{U(Z*+zlLkTr!uLTcPEMMd@vm>?=h7ilA&nl~C40ToxVhm@ zKrFAIphL%oC-!rZu~yOX4a*ZQrmn@`l!RD?J}9IoQ))xY^WZ>|MI6E!VZL>`Cbi^_<64>vJ6_5LiJp>(>8dINX0^`yq*A z9}*)2wq9Ax^mAu_8_+hZBEs|0dW~p-gL?O*9o>r$`)O2LMyXC89#76C4`s<7aMTsv>!uB(m6OEw&d@R#cYh;% z-`Ob}0Bf zz}`AhxlVj;=x$lWne{C|_*|GpyCcBNtpLD_Voh^{tALR~T*duGV<6-%u%52vG5m}| z$rZtkh9K~VbaFL0d;P3Kr{s~jIbP55QELcDN7kc@?X;Al3(3JrROX3^8oMSkGgimn z@eFNmD~wP1DbQrU7v3r-Hn{GeMc_Jp7+g1oRjj}-V6L}@;tFYeqFx}7Hq1p59;>{VT6~VNbpl{y;jkx8#?1e98-!@!7?(H>0#R zyW&;oZv>vdt`o3D%|_97$=*+FAFUV8yUv}e<8(T-b4F_FT6i?&c&Sa^Puaeuygn3P6ZA%R~!m+3zp3v zT-qhSF|*+!jZlph&9ujn7Lz{Ju*5)*=AZq>dj>nT_lPU9ecq{w1>(5&nPRf$?ym1A zY4kry_u70bl6gINLuI%&$677S^4lx!UrBM7W4@;?&UU@8rmmo%IV7l7wlx)ypen{e zNLeQD1D{p?HVG3G*?U_c?H|yAU zpKzrwKzfSYKVj?z@prT8M?Q>71y(|3vFG=b;taI`#o07>dL`&<*UU^|2&~JYMTK~D zv-sj1FJ$D;_9>I?nlB1%diC^|q^WF6d0G%kLvtTHHt4=f2 zAEeE}`Jc6?QbpiYoIV83M+Pl{BHQRn&d9)~c3hW9x^I?1Kpv>e|b-HxgsO10lk-@E7IxKzZP_SrJ2bZ=@($xAFUsvU?;V^_*s6mGXk@YQ>3 zXykdq#9Aw85QC*$VCpG` zRdE%gxUUpv#2b}fw%u`+_e1I1KAxB5ICwDMAk={vW|KB#kb$vN%VN{M#LLTo@rNf* z{vc6I;ur^KmDDQ4&!6f^y;JnIzve#5kPo0LP~6@_)`|k64AoGwew8OUikINN!Cb|T z%|VI6(b;4Iw?mR0Lvu&YUa*~nvjZ;3p}jbmhNH;?0yAj=rXzfOpMhmJ3Z7Fb)$!0) zQ(1WA@MB>@g{q5#=M@f~-WR5n5iwP%Zu zJ7$@j-P@TKO%t$2tnGPuWiUKfN_L+V1knyDe0S_pfrse%SM)f?o9xKnr*$8cWkKvJ&=sAO_n~S(DCDXw`6Ax?CK%q!8RWM zX_RTbXt&w*>C-K+1d+TxJA!C$O{}7D7eA9BBPb|9rg_3DB?3?ngSpJYR*;CKa|q$O zz;(?gW}mYd|FIZK&AUC!0_lc!@5i$EH=>-%**$aqF>IC9dC4~>l^Zt4HaP{4K3e=Z zs%xvI;WV@Jhxj6Wo{5Ni&Z<(q(Z!NG43biW2vID>(6O7FBfTYGuIRSD; z(bI<3`VWv)@mwu_7LaQScAw}PQ1^>XW*G=b*#F{E5pvoWCzU#E%GDrNbyTuvQm4zl z?TX*D{WHVR`}ddU9SXgt6Hw}cmm!Wb3oMH6|IeW!85@NPt=oOLW_~-V-^^mUj z6B`Z#t&}cp%C^}gK0kIostfm&e)n!~pgJ(1n2j2oy^;F`%5=4vhhU+SO`DEUN}U?1 zMN0n22!lpmIkfib{j(<>Wn&Wc{gow#+cwT^G^0=s>%HdmT1;E#LV4ltc_#gPfBD%n zJKjk6=k0M4A78R|?E{_|?|G+O>ittL*)Avj{O5m7jE+1|&|aXX z`haAk6i)^&01fFX;A;OW{q=FqP(l{7!3OFCkvvOp;(Fvy?W+B`*PUkLYO;G6FK0yu zkudu_MazaMKC)IRR4#vrc~m#w+}kYF-Z3i5Y3ubLyW3yB9LnEYAG(I_QOtq9qo!Bk zj+;{6op|(cnW^hObGGIieOGphk6Q1Ty?%F{{{3-;mO|;?67H$2soAIg+n~5Ad=n~h zYrfiH`9hPGsH3Kfk;mk)!AZUR%p-4i?-^*Nec|bO=7jFBdBb?-Hf`&MZqtou5n2?` z5`=~T3+%IdP|;{T0%o#Bw-RjfLGXv4ZfXF4jl&ep5lloPLpV><#+~SbamD#WCgI?T z15#;*{)uc0La+i-jf`X^4LN{!P7vkmz%QCrt5y+66iF|))+@HICf}}wumFu*dm&VR zEx&(XFzSV?(gD)yOMFKE(G)`Oc9e`>K}P}hIcBMlL~I8UrJy|osR@DTiAw@PG?_vH z1gQ-q5E05E&F5H~3|a#YJWJt?_K;BgNV?C$fCwTx9OOh~hcBdJIEt!7F45Qt-Jt{< z#abKTRO2Q*1tzk5`1b8w$~t(ik%mtKECI!VDfb76i-5mXnHU@il<965$|K$}L?s9x zOzw;KgOu_d9pJD;jA@BGJUT@IO)h#qPxx{-d-k)FC-^a(V8amW<{f?@+K`(~@E}&Z3ZTGjgP4sdpNV7`hwsUA=V&M)v)Lk+J!3^&RAHyo zlob1p71b4U_H(H>>Y5s=Uhip~?t9D8udk$8r&i=+@j9|K+0kyz#q}}u=_|CyP5Yit zq{AP!z#5oN(XqOQ291IKT;W-_&1w}NHeS}5&bm}-dtB^yy3_PKH&Qz+q))1J?HCbO z4Gs?0vEla2bL6@|G?{d|&^zVjp}ZfW{*(Rb=UOhCF3Ymkc6idV$zbv0`gkXaig}Aj z@h^7*uk-$V7jVDB$m!FkGt*zV54ipk>EB@03}ju%IeYG8%n4et%-j{0<3$eo>gqyi zeZnp|Zr@q<@o#GQBzLK}tdX%YeFx=zTEsr#`U}mAj+;K8@&xsq`~3XfXOfmx$09NPI{4+OAC@AR_0l4wVzSR{_E|KF`hH{Q{$Kl)d8YALYHeRYcbj z=^5Gp28#Hd0xf{A6CLU=y))s_kgpGe7-_q^{@8oM2!Z?XXW#Ni`Jh-u2*xuoZ7js? z^$88-w(?U$KgIc;9fyf73Hc|%ueYf*3KTWo*E!{%*3m@3QJ_7aAn|~twl>@5ZLJT7 zV*bP!2Xc?!O?l!lHA=RdGyx+L-@bx|#n!A7cAhuJ^G#0_=)2e7Lkm65;s5*7b(xVp%nM{U8kg*#elLZsk)2qZh zaA5H{(wZ!NieE~ik8&!m2p&&Zh9Gna0}tW_x(70yYW@24I1h*+*jmMPL6Fga|ADlE z<13UwAi0Uh9ah+J5C*`83c+6^pvP>OJ?|6Sb8Xob!%tKR6>+LlMOhgk9P87J9}w{q zV1H80LTV=YEi$gZJCBUFf@PyCG|G@0t%Mi>@;#rWu|~McH-E0uWA8(cb{TvOcnolK z#31(dr4LIr1|}AQ z^-ybGP?WRmFn`W>eIum#_kqMC&)v(zqX=E8{D$Rtiu!OTWN32HP5wuK1Ewxc65JA* z?1@|mPNO8HARIJ|8$M{5<`942pV>C&`<|^OS|+d)xLDGV2(2Q~njvT0BPNFPv(H(X z16*F5U4Xt#49#9u6)CORY_ulc0iQ-@f#G2h0H@YRSrvLCfUSgG#}P+b(#y)4x0e3l zq}{FE z!#x;lXhdMb5QRi$WSpdog9zWdG*B!rA7L-$8=ZL;!5sGtE7 zuSW>Pxg3T}?;oiN3(Ja6M2hGG=?sK^gqsDsip`H(s|y3~YxODYL)fm0=6fhe;H7Ho`H7nAE(=O&~xlOW`7~} zH1pLfS9D>|Nw`*cHk0WisNP%Qgh=usTw~cga|7HRzAJ{@pG#>dgdsv{1S=6TrUun5 z2tr{b);aYO<4IvwqH?tx0_XoB$!PjSOwWa$XI_gmid-;c$of zuSh2i+(*ge1!x`+6sIs94U&2nkOPDv$6CZ?#=VU=;PJZ(8{Z88?I$4kM30nm{YkBl z-)Ve9G8Gm5S%j&@3^#g_f0UZ+g5)Y>aFfVX76=)e5faEngCs}$kH)RNInXaC@;>$V zZ$UZ^CjJD8n-~}bC8#c%nlL8@486!-ah9TaH9s<>oyl>Rm!D|P_lpL7FhmAjKn;R) zsSpXOqPF%SE3SK6z)|9d3_rs!p$GYQ$?St-&1a*XDswKFInpbSz}$-IL$DvYlK*H7 zsJ7w0bEKN2?LlK&8Q5mX9ra*%${;Wb&FdX6FS*3_JuFpwea@22ii3+Ng$bdAxR~!1 z^%xCTkQc1RU;u2NKT`UCgw2txe4pu#pQ?(+uo5u9(1^4UojX7{q87kW--7yG5|Ycq zK<$7arWDYm5rqyc4)KKH(?s49#t;mMHQCpLT@*2<280VrA!e)1ap5j5OEKC7wz6bbwbRQGp?!|NaN^dEu3DWYEGLGU3GC1s9$ zkfmfI0Lof2Ms>IS&=R5xBiF`wxcO1EE9p;kob4ANZv61}L`F`8O{m`1qup_8p?J=qVzM9c~w#-RaRN@&TTf zP&EAh5xu@kU4;PL_(f;$~xM7udtg% zo)7Z9j;Aso%v@I(&EU=?q1(QH^mLr%hYs5Ov%<A1?)z$v|8hh0Hsf@w`Y&9I=gso$q@LdcfV6L>Yy{6IqH2Xf1s}KScL} zE3F;&a1Uw2{>9(d!{Q>h^B5c`g@;Sr03;Bd1exMSN=JlfsPz58bKqK8gXY@Jwn)W( zw;3|nwrlQd5gWUk_@<4?MmmsI_M~tM_uKvB)kTv@Iz_-!G zw$QJvTx*_hT;h|sdBfxUIM$cUFHgR2a}oP+dNp%UR7Pd^1hdYq7H5~#w6y5TlPOEr z9@r4J+I{uOd!Zwn=wEX=CNr;=az3^C+{yc_<_?R81CMMtvgybZsgJXk-(Nhj39%cP zEoy&Zl%zSz<;=Rhfb!3e^G3dhOa8w0y~en#>LY^GNx0b>Dkb38RkDYT*m-|oZ?~Jh zb4fcbSru8I%$C3QeY)OT1vg6N1-^%OHUwc)LCa1hsI~CsQ1UM~$N&{qL2E-Q5;6|0 z^q82l@EI*be_gz{_(Fzs%}NCuVE+nvj`Nk!ZoVuQM}jy%*Ck%V?Kn&W!9(l${ zrRvLiGB_``swP!(t5K^LMeGGD!zlc8wEE1huy$;>%2I(0rANU&PAYlnab{T8+K733 zdMd%FOBshH$0L0lX|~w+Lnn(;!tgT>9<;SWdY+64&dlK zV6NjCq%2Y$d11+v!FZU&#pga@`(`KO2j5)D9KO4r&$@IM!wU9QYWGHsl$f7gV~kKm z?;PGN;B&VHys%Wm=st1tv#U~6qF;6DI5CMkYC@K$-GBjIk%(8n4a8^F{R=8U*@+=S zs${Y$FlD^SRX9=I#UVVMlaMi^X&4rc5;yZzp98q>K5qCI$@FK339g?GQ$nA3@9x>6WZsWBq|Ac>Y%O_nn%YH9jRA$Ab54 zJ9Bp_WuZ6YTa@JNM0XT5uevkh%(tCy2iv8@ASE^Ab;@OPoQ^s&IvOunrxd+dB^(Rmd2qBDk?6nM8aax;jhNQ#&#Mu6C*}mC>y%1^Jaq)mC2|z2w&KR zg)bt3*lJHvVXO8`cnf|=Y~;&c+g6YLmAL27(c4s;f@j@H=ces6Zy79Rr|LU~YT`G# z#jD{*s97hz%2^u5@0H*<_%RA4)#TSjqy;MAjFDz;=_&II2}vs4WhWyqKA!?V|2Rmu zZA_MT$RE>sbJ+u0$3y^?a0%~S?=*E6D0xzPqwP8Ly;ML+HLqRT^x`%$5B#|ih8s)E z$yGxW5e+0L*|U-7<;uh9o}=Dh{Bu;MSE=^im{4Wt+U=8YHCQj(ao(18+mTah+BQ*u z$wh6@+bw}CTMfJ!ND>|-`mwkdA)*S}A?)AWb~_%3{t;_8n4<>P(PwkUOd?n1yUKUR z4>&mW_zmd1JzW}XZE;K8D@!vd*Z8^PQK#I31RZLB)yqcI<gR5Ii= zwl7F|t&ChwZ4kqway8K?9OF??(#S#}0fC^F9#Ti#KI!84>%1w*#l}`Z z9*hX#oj;`@>s7yTBLO2O3W|%-PkT^UxE>AcE!%h@s(CJEmlhZZS2A_*Gl>xIJ`D&k zC>0%j$bY@!^9#A?N4v!vf$12)v1SVslQD1ObV zx>JERSZIv_Sf~sQA z7oxB6I*OMGI3cJ*FHq8LQBT92eF?$x4Il>)eM5l;#l^7dGV$$0y>)fsJQMt6xTuUv z74!V>2?q#oP1W#eFs7LtNWEt_S-&_roHxF3K3Y_tcXXtP3F9ljZ5jAw)Qt^eJAbgQ z?o}W!_cYJ7{IwdELzliBlZ4a7Wz_I|yl)YDdK=QI22IoavFex13~h{5`3go_d3c}m z8hGV@{#*-qsHC{K-%0~Ypag8P5Fxa_+<;He$i=rfP^!^rmsh<;*>Pe#OW7J`T(5Cx zsb2L=9Fs}i*m5S3`y$Z@0HN|kF>fhQxNq47tHK>QX)Fis^(u6Qr+ww- z8sk$vL)B0ls1;hA%?nB1y-QGGpf>B+%ojfMS5;2EiPBV^y8E}fBO1abC%jGbQeDEs z&Ggqsq9G&bQ_GP3mS=$n)!%#`9i8UC&TJM(<>%ZOLA{Muc(RQFN#%6PoIN{a$hdyT z(LR^^7y_6hM%xzmrF0=d@z;uQ-JQFf@A@g0AQR|$B7ba+CF2R$W5+%i;N>l4@)n1J zFi|H@8?+Rf+@C@B6bo%1$2+!m;Ojt6qaaAMGp1^!Cv|lJPt`;RhqSEh`1U7$evyEe z&iR7x?644Kpr>!C69@Z;{jau&Hum|TPu&*MGu@q>QD`TKB4^c-M^!b#Shao){S%qD zlr1b$$h8;1g;Nt%M-qHvkBZI$lqrYQ;n-+*{)^{7 zINpR%UmTEv+%uLn_T|gtNNRq7if88?owZ)IG~ZK8cvPY!VRvH2amoF8YUN)CJKWus zZddY~9laL&+I?jhovbfF%)Qhu$Y83ms#BqYky1i?I&zJwFo{*Aj83C6u?Q8)@SrHe z0_o>JwqaaDJ;t0{(@l75%sZ={BCp&Q#<}N9kxtEJ_Kx)?X<2p%39&FjE{^G8(M<~z z>GcRu2p&oJep(jx%w~p?HC(3I)Z65HBli8e`H5D0wv-z05bHUYeS26h)JT)=8l%oi zxP(wq$%$9$F89x~j^j4g&d@xRv$!xj1E|R#j|^Br3>qdrtWzY~x5yFG3n>8D(G_Z= zB~rK>{GTC|@(}kNvhj-Z9M=T0s-UYIRxCs3))w9d%z+A@BaFvYx1H&Z_J8su0;jvq zNak^f3;NSLI)4Gg^c8=%;n3G6%`d|Bt1awJ+|`iE{rk16Et|!F*jtk zhqGVMQS*9wV_eMIqXL{9Vq#j5$HMXG2f)yjn=Kz7?24p!e{r##|J#m1&JgHx&nZRi zNK)|$y*sY1{^RD4tZ(lzeUO_szRDZ%XWD)@jz^U8n4}s8?9>o?M3|MA0q590b~Smd zDLf^mnfn|rijGM*5|zK!Yi<#dte?}PqoY-zkfci#Rg#|?!W$O433UD@(M$x}EepW& z>3*tVQVx586#MBB8n35I;-2HqK7Ibyo?pXgrneMsXnWi~g`sC-9~&E<*p(T?qcPwd zt2$Pq}^kh1*6UYYMOcgX)l zzokW_@r~m>_p_3G-7&U_Gx3f5>8jtZ-eSjxz_weD#Bg4_P#j0K zIe^u3zI^cgfA?!KCukaNj23!eTj~}T=X<9|ZZs)kp)ejVYT@_G$;I{YYVua65UCYgluOIbjIlS#j&y=NC z(~N)Ze*c7hG>0i#GZDzRCX2m1I|G55bmJ65!_89=-(n5R{-h6+Cy|AgR3UgJ5i2|=#LdmE7dng(doC#e?Gx?gI66MeBqUtVU|nGtH8gY+sbMTq1>4_YZhns+ zza&iue6KW2-bq4@S&18EiDzX9ys93yEfBrLpltwee;l{Q8`mENjZQowEjs^0Va6H@ zlaAx01<&A6z_%!Pu*D;#^NmE-LEggC!^0BenjU{O?ee|eqT$#SJ+NB}V4d&ab}ENsY_{B6PVP`?@nDV+tUpp~f-S&x zk!KLOxJ$Qr&^FghubcfA;CDrgbbulv0pwW^GKm?eaVoQI%;gn>0nxfz)5Qet)TK8& zo%)!!XS-0ZfWO7?k3|V2i#=d_2sw&;rC0{=Z8?(1W}*7{vE3a|V<7K7<-2I}ijTP2 zcF*)uYd})--)cPt5L^}&K_$pp4lR;LPJXnEF;{G;T)#dbDsqb7$|72Bdw-$&d*88^ z$9tCOEGk*xt=jCy%3{2jRH*IOYU3uno#rF}IX?N(tqk_G3Z4w5{7$7*DbsfyPo=w* z1HP#YV41)*C++8LbO|INtH#Ju^&U71u3X3X5>sMNg5j79%EOAd2$y1VY~#q$0q5ZN zt)l`kdnzgyXM3;dqS6KXYRxT0*;_+G;f!|h*s0l6P zZZk;;BZF0UVB8Y$>z_El_3<&`QIV0g@Rl$JeWR+Wc|&|*DgtxHD!>a}RnxNcf3!=i z9xQ^WIXn{8App1mVboFkshf6FEp8-T?6k@f(=U2XCeLEPpU9KT-l8M!Ok%c`@Z2y0 zfpb`DqV)kcJP{S6^vA7(k>nybd7))q!e5O?NM0jjdOE!j_3BkUnw4Cpwk}mrM0Ln7 zMe3`gN2`JD>mqfewY+fkY8qrdJ5XHFOQf3dwlgse)cmx0P`rh%5jzCfzg8Py6CEpZG#|UQDm@1GRalCR^44fPeTs9h4KBl;H9hG z38J8;dF$S;+s#X`!uTs&maCYVy&)|VW~L97b_52Zc`XHL700_yKFZ=cy%*&xcjJ zD}mhE#h$+rQFJnKs3vcLzj6=r7p&0u29vBuEZbwQPE{}-e|@jYIDF{7v7_NekzQBf zoy^V1V&3;uW+ZPEu_)^kv&&8sNIHM!E6!tqUzxF%cUSK5q>I@G)RDg!6`z>E367k` zE5@m}+D_xC_GI=Z{lLeM)ft4|6K18Pe(CU&zY~-oXW^~j=FDLx)Iq*ehIV#0+rio_ zwWL-(Ev9X;euuQi8U}tNRY}u%(AWnHqc~&{&2Byl|+m zv0JYpu7uIC$G{094m6nq_zHq&qFG9UX`}PSR6op?}CN_b>TURHg(QdzXQRd;6 zpN?6MIKj9ZZ?|2k;z}A2;^*Z(Pof-9K)=vXDr(lR9cK50xnA5gVqEqQzIy!N26rPy zb61|faO?4n0g-=SQv6=(uKWA?@u^RYe_txN-2d;tOsoIzzgzPk^p+y^-{|c=+{eFv zgSGtd%D*rFN5An$P7%`G%V$3|bo}$uzgL`b3JqRKV>|r~87HwSD*i>MDIBKY(Ru)- zMRneM7P#WTu7B@GN$R8j+++&ttsf%){=xr@8#N_qjRQ4?%wHpd?3@>UTR_GEU!hjxzrq_uftB4v`H1*%1!O_s&%^*k2 zkTf$sjvxhhxvpQojv%*g-NL}Y(ERUUXJ_wZAEp@heWvN;|$3B1l{Pw@&+mt-&rmD*yZt3&m)ga}g^!O9USD(T8X96D$wLp|{0NdU?BeWnX9w&*6LuA zWv6dSa`GFSx|73=$)4a~Wo2c1dwY1r!^5LV?#Q*N9w<&-td5<9V!3ASEZ!`6B8bLOTE(4(hCa<>+9<_ z6#dKYq-13GZEP$$efL-1#m~>1I5};;xjJM81O#U3xB16?BhciMGXyLxTHTjFJm(o1 z85tcNy+4qUDe~}HRTbaD#6{%b>brOEzV!rOE_4NCW@bi3MNO7!W((UBI-_-<;40FD6`2PLDmT0wU%hPu@%Y4yBva8E)+8+NPzjlqDo}P@1Y=4@?q}IUP*m!Pql%IzO z&+QgEDvkDQ_nSHF>gmZU#iCBr@3oGZWn>m7Cw2Ap|MZ4q`8qMY-{s7-do1S z#Wk$4cxM{lYF%d}b8*a?sUhuu$hBaCjgrI0$6;b>I+(~yjC2|rHqi0HZ{i-yqVPR#p9rt=NU2Q(Rx=+Dx_Nus8 z;4e)Wg3~km;12ytcOX=+Prs z^IhC*?CiEv75a8|c90()uYdeBF+p>~Dx~10>$mnMhfhUt8BghMd8`g3R#jCYA-8>Y z9@yH#Djvzb?E^94@`0D1f1$}KqpK@WQ*&f*xj!*6F^NH{qNvEm!2uh2Ra3Lr?!^-+ zzkhhR6&I3MSY0h`_wmKv-kwD|Yxxr0*Az#seCeu!g7L3kzh=v;sjEj5&|)G+t?t&4 zoIE^)!h2r`+2VTtg>`CSVc{%BtMm!-1jMAJrKPjxn-_xjH8eCNcn?0kRGyxmCdaiM zPUkBuEQHwP;^!yC#=ddm#**dg($doM^1ePw*YbRmliqdx3CahD_zsdJ^$@HmXqboH zIbNI7C`d&`MQs_Z{q*#7uDJA!44&)Pqaq{sYa|2y@*+!pk$4Ve0}48xf4An4kenR4 zixjt>bAxeT1_p)~`w}GM>a}*dVi^K&s!OrCxw${h%h{YNm6Vhe6x_XckELBL5<9%t>IPLk^=t&Zx4pd`X19oE@$+YLWL$cb znY+Uh2FAwp+6o^!R>8hvOn3K1nRdRSVRuyCu)UcaT ze|fK5gClit)ml4KgCmTo@7mX>q9PUqijLxYUtiw`_V)J~YZmTWS`r~k9&NLeaGv#b zJ0zsEbRDwF{rmS{78TX%DnF&eAtYoFdVr0{T3YtNya@{K|2z0G`zOqb0rLAQ<0E(X zSCy4tAS;|6?Y6jlnVz3NffIcH{#^_G@ENCeo`ZwKA!Mv0B~=DrWuIbdiujV(ub*tq z)-8~eQd2*)u|Y+QYOUmyl$4B&sM4+2*w`Y5*NV7kU`mLIYaxC$v&FU-d-8jF?m9YF zgocJbe?FB+D?Xn_O+!;rRYicjxb@iX-aS(9+V1WkIIWtR8hjK72S?^?uRnoWn1zL< z`+C9P@yW^P3&EG->?-{3)uj$NxVUN%cIxW=sNuxYLqkK=)zz>RX{G!{7EIg`8tq27 zgW37{JgI>6+DVRmo5P=Gq{%aix@_xGIc zbU?m3Jw088bq_P8Wo~YMd2tT)b9b>vAbWAg1?stnw>N`K&=Us-NOSIJG}?5XPyS8J z%=mn3_v$(SOA9|NEiAO2t+j?IgWG@<={wv@pr)dNy+BjUsG!i_9e7%8(iEpvc6xlg z#D^vRLSy}X3ad=e+3}2J@W#f*G9Q-qlAW@R4Fo1E`FILGn>r(?@j|mdUcY*EywkD! z`}glWnPAt44`W;CazY2l81X203nn-4DET$CwPk`YB*kbrSlKu^gHN|xL}<3>8tk75 zz0@pzlgVRaJDPJVB_%~rP!LAhZK1Q*^yy7mox>!Vi#GQj>(?KLhn)rzs39ul=USD17I0;yrFm{nSH)8b1f3i{TzV7QU^kL=>v8GFoev9q z1^Y4BNrM4r$B&GRj1(0W)z#xGD>rBB?kgwKOaTn~nn5WlB64;xCUXSw5_}n?sv4V; z@(D0fcv#rdPqUVg0ag*H;oAAqT|T=D=ch;U@$oQXSOhfRSN}V{e=mkren{gdX%XNS zd~%30yhTY*pPQ8x#}rdp>11WK1nKtVXjj0KpdJy)dk|9O0Ex`g(-YPLygqs)7^^5G zDhg=J)7`zcxmir~>exOU@1=m@V)PL;1-!K#?p z@I*oYb;K$g!+o zk-?`VyQ+$oGr*#yuZ9*+mtm2bl@d=~4xfhu3Q9>yzBN%gck@;qHDTj44~(Eq!F@>k0~u_HQ{IgMAt7XdFa!)apj&^cOx)NaL`~R9I4i zj+djy$M0TXP7pJr{%};2G+S4W=toEHJ|_s+l|Vuo8iZpN<#i6B;Gm$O$j?>LPhdJo z;$W>k`sINjA&RBtIQA*oFJH3$%}yR(8+iK`pFy&SO<$POpziPAzhOOR zx9GGUKHLaFL0ef_iO|PDr_l~uo?a`$#zJKEMLAP8`mQ%y_5``0UI(l{^dRV9L>!Pc zu>qeDp-5hF*l(kpDCm@8`wA*v;MP5$NsQW_x$(32@=uRDT3eHcTx4ju;#jbSAUQyg;I&9l!`HLO-O@B$=Y*41$m)xo{QC0^ zqNcEa`8G}BfYptgbc8Y|s2kMo&U!4?Gj?d?7|8hNQp&O=G$^kb^}(^(4LU1qhsKIzassopmnk@Cy?z7i4=BqSuz(-ghZ!VERn z917RXPLwKmXoSY}tSt?Y(kzuktYUp*qomJ{89<>YPdd4NJ<}}MXTy)sA8{&2N0Qe> zxPD8pR7A=b8kw4I{`{FE;dPyrmG$P$YbAYePxfdg$U(6&d*s;=p?0S|O6isk)b?X39f~f{L_JCSf{MUgFRe+y1S!t9xGlno7^VCpGB7s~I1v)( zZk(T2giV`V{u@(4g&ecne;|o5KQ}iwJss00@~2?!JLvBIv~l%eLd=f1`_Lm+y9-?a zm+i;$-kHXrqa`YP-nHN_}MlwC8M4 z?dNc?|3?7*rlzKl2$NcfRWWFi#tXjMxxU2CxICod&QwVyqE=B}N^E%$~ZDP}Kg_{NM5 zxhKw!1o8$jMSXoe)VQlVLv8j&N4LGE#F}#lQSk^2eDAqj7I;kpN-;hWQSjx3RPZuy z@nfra<(sXEO4wy2uz-K@kW`}~P`Chj^zI*4!{FYy8=-r|fFLRlvf8gLIJU<%H~;-L zRSAg3)WjsNg<=Tt7LCOyyFe@BAcHdqmN?BX^Oe!o8odrWKS`t#T=_nf>g$w(w2L1J z@bm9XmT9v*LoH11y~~Lqam}4%k4>WCdFoYfD(Jm63$Vx3*m!8Eloz@p$erI>QML)m zI5dbeJ+NmxpWK;@4$x%WaWN7iaN|qf5wz}$a5XLuFdvB}<3vSVzqT?9o!8@ixZe;5 z5tX4>I86OSAqRqd1!4g@WWTfHx}F}H1(UbRL{Ct{c@#t8&XihZMdpIvM&;)({`~aP zu*xKo2^*S30ucsg*|w3d14)no_46v66IyaC>f9E)6A}_&1ef?C#-YZeYHi;RVUc|R zCx~ro`m;6HAnrU1-AY0W#R|(@@cj+MTHGU(9YKUNV=Giz*JPxYS{8$xoSh|o_m~#s zWAH>4C?7gFv_i>+TX0?IG`6tdZ;!j#WQ-Ib!xsvqs(%>;U%q_V0Ii65hc^`D<*LX6 z*;TKPFO`!%j*JMHwF1tsw|N)Gty>!NiX}q7->DiwlBpdb2ars$;M$Gl0iE&{=#J$% z=+-JKEBo;|E#>dMv}Y;l?0oY3`;cy_1`YuM20}$cLvig|BK>WkXIlRK`^Q!&rnYs- z+0j5^Q)jbuaekVWm1Svy%O)Uj3D{~BI=tn+NN?|FA{fe&a6xYXI0LA%AI*uCg5UAS z6+sb(iqYpqU(<$fJ%u1#xDw=j0+$aJTwSZ7KT5oFI)TyK+pCmF^kwzj}$9ne543%JWq z65=ZVC$@B<=KLLiQ+SUVf6Fk2cQ(Wl)ag9)m#<%cjopOK`s&iQzrdh(y?_6n zux8o)CNNZxQvnu#d&WoL>GQ$Jr)+>Y6$pgD%kv|De}7o+o)W-DWDc$ta<%*HT0DOI z7&Bb1@GeXO94)Tje(mno)YK3`b@}|3@Zw~%=Bx-3ulU0WHaIKrPk>-pSzki<-!FU0 zDD%{O_s0)dLmk%rQPu=@`O<-R$=5`J`Z!{m4A=^}vT|~qFEVFpEVW1?yAlI}g!*=q@4ij1;jgtdQQ)k$_91qTTkP%ZPL7YQ@und5yJI`9~kG?;1 z285Aim($PjLSUX|zYPC?s{&jLoM2O3o%IbWBm@W$>xO@bN9}g%HKiHmc0%{Oo z4KD?r(kCERI@;Q-?Rj{37CZgWkQX-}*#IdBM_^!}we|AZzXKEmn)*J%>8UB}8JU2; z8mg)fKcsPMaF8kKP{dy&CYEJMS?CUoFLHpMMN5k?Q^VWcol3&f0dT6n|H*ivLNq>g z7)$4Rk`63x(X~b8Sxv4uz=vx;en8p3%aumLdI!2}d?#YSm)a~TE32!w1!_Ax&!AsN zLaIz#ep@=*+Oi{7fR|?{0flXCISXYvySd3gN`Q72x*G3^VpSvr8Y|E9EI`@xw}GeK z`Pz0c)nQO=rdo0j7E*}$kWF2wR&GU24I=}?(>6gaE^Oqku`zdjPuu&@fl~<{gV)Ka zsep_7^H{KpDk>!079=k)X1k@L3F%2Yd6<|&w8|RhoO{C|^57zXCkPun+L}W{TA%zA z`YR$W&45_JS{xq_p^r^SfYwZ??4rMC#C*#1q6<6B>b&Lf(DOph85g^U{`6ibR~*AF z4|D|3FWN;{@a4HU0+1i=j1~Ge1i{0LhuX2FqNhi0m*i^R;Jz%6$O1!axRWJp|1>mI zPFI&Kt_XSufGa>jozc?M(cu#i=%|)rbthLXsd;)HLX^nM%L92-%gQMI1gIFwv;8Pr zbLWG{{I_n+Ei818XlDUqJ2O2EgP0PWGhzpf4lM#?36s%$g??q)^Gi+kTb@7fhPa!? zKmcR zf@4Bbi8`5(l9`EvEaB~Z5XBBFE-7g;4w@b^hxP$j*u}d0(U4~9>IA1T-l=G6u6eZO zDpnO=qmFZ&o|#d;a|aNXk-2$p@C`x16t$Ayv|Nbny?f6hBQbiUQ0SZ35QLWw7kMG! zRZ(5-3Ih$fX?t~*jFFL%l$6D{2HK@dJO(7>_w3gwqANsxNB>;=FXL+#adGk9aOicy zu)a{ne43xn%FLX*^T5WY2(o3|hf8r+RcJ~d2Si&+j8T<>FY`Kmj1Bztk)pvt~2fm$RjGK^}=jP{s2Pi{x0_St-4!OPc1Xei; zl9Q7&nkPj|NjWny@hmm<@$c`dy}$h=7rwN(efj#ne7HCL^6nI`>i5e-^SU_nQ#T|8 z`1RzW@B6ER!R4|EuV3@>XYkCr0~rMej>XsIZB;P)Bc|7fOfu)zbD_(`?W|tRIrO7^Sbyr8nl1;nNktCXV{#@YG>}g`J<$;$E3GXjFflQKRg)tI z(qC9un3u!C+&u7T$Nb`6-FP`gYZt1h#vIgD-^VDhdh;N=q>}>ur<) z{sHyuU~3y0$dg6uf*Nx0I5WMvgA$@16+nkBn4m+37GHuAt3IR_3^gho?ee+t;du`!}AMDZFNSeTeLlvelE z&Qfd=ps1rFQH1o+Lrb~Li(DAFo11@&Az}UL+1}H03Aps+&t{9~#yw|e;`_kC{KtWA zao02zZuFm*5*?XHODT{PCpMe5xS}9%)BD>ki_oP1*_=sIWJ#nJE`=qkj)B{^><(f~ zT~GP5XA>!`%3|ga_WYNzerTAJ} zK3-l-0-6W{Nm%yX{W9t3D1-amT2Ry>*8zWcTha+a!qIX5!WXD*4UL4Q+H3iMGlDPv zaf*swqb>V|ZeT1a<~`_g-6hz&KZ*bedHYt8T4}k zyj7OHxHxqa%1j{O+nH~|L-v4PU*)T4kzM^0x3XeG8r$eRC!|9@dVk8M+KD^p(=rCD z@>5`P0hkoM%Ef+d-a#QpOs*7swy#V^mO3zS2MY@e0+v4`zNK+t0?qRMxLfDwLuY5R zX6MZ6>K)K>6m_J_IPd~wbz64!!azN_efzdVE`QwoBTgBa;F)Uku{#Q95eRek-$ z;-ZYWIMkko`po+(&6wjoihh1=EiEl&W%ka_uen&9JJ&rJDrpeel$p}^PF`FEb#-;; zR9&AzHvkj!td6-e~i)t72jF1jJD+%N5|hlWi1XsFz)0*`!B%OSon2g@*8`0U=s1t9SwWqkMG_hy-@}XY8!nUCxK5L(UI^Q3^#q;Q7V2HK`;G3j zcsg3bNJxdVVktLBN3sAA02>3xiAVc0Lln4CoW8ZsYgbTEUbpFNeoAN%%EtNmx#4T0 z&<_^XHytDkLu)rphb@?^{+zMqU|#%Y)Fss?lzI5p77*dp9a9($k&pYzqpgYTn^YLF z3z+bb>S&RDxc_+b-*E89eFNpYkO1vQbJ{=$nVvT2Y>K&dbyD{WdZH!K=gs6YZhQfM z{Qe~=Wv|H)%Qd?#f;#bZWIK(%U_S!0k7V{Gp;8?)`W3J8tW8wlTM@JsK!Sa1b}{wx z5{32zOC)bxc8)$F1e#L)L!vTiq5`R}C-*3fI=rg@IyXPgOn`o0gxGbkqrM0`S~qp- z_Xc&#K5KH>+n=#QG88Lt1GWWza5IJ0oz=c;Pa zo5IdfYIo>z?tc zVy>Tpo>Jxym4}XVQ)5Pmi<)v z;+)h{N-CT``Ae}qIR%9c^*?{2NO>JY!+gQ}13+JMrJ)AkM9RB(t2X=}F92}-&BS=* zrTevwZ-ip%Gf|L`$?-$yo$0uUD>3noiOD#5J5BYvwc?fXLY0oianP^T+m#O}aX za)pNIJmIyQr z&qPc|T(SS4;a#c&`lW_EZuOgLJ452qeJe|)`p{r$L6(Z@(!;=QRnEP!2(sEr?n!L% zR+cJrO&uhhhxol=a`B??o>PquOKBCZXe;W5L~Z4q0w^HR`!&?p&*w|{{Vs+^3n0m{ zz8B@})LXpSsV!#Em;mb1YpLaca301a$?&z;K#j@YP`A`XGC24WQn{|~NWORi-*Bzio^E%#$`lk0lQxevz@Or-Ut?T!L5+rtt;v!i2LCmO z2DzlDsNQy%0zu-6IJI()|NY1ZR7pil9J00wq~QN8c1})A%+n2xj7SBZH1_lax4wC{ z4V>I4P{AWvqQK6k$Ml)IN2EC1H{ z1O)OyUxiEjF`9e&&1X^7v4X7ew#JJaK!V^{O3pOuGD5$4Y!(?X8|Jx z?857+DsJ)*Kp)C0C}81JxdRp!QwI4vgU=L<2!00~23sZS8JL80vlA0xMF2)hpyA2O zWALu%H~D`kwQ)BWMaY#%_%kj!@k;Lc8_$nqi=!ZB{Dxff-4VdQ08>;q2%ZyRyN|>V zy}05mx}RiDY-CTi2}mf?Bp6y)RKI#fML{tjC3D8&Ykyigd*W-$2O;#aw28C`XNd{l!xOD;&mJU2IgRcnw+p*i- zpD?4!2l5IUqNlHKT?QZo;9lhU!V?S~swB=8ne$|31t6$GrShZ$+kuf$sZ>z9FR4>C zNM>(%@c-+!tk6`TAkeHjLoW(J30UK*#Ov!nPfwq2a+)Dzl*UJV|NUl(?YEY8h&-WMHONf|pnAXP7+LXa0Hu=7Scs9ZV`jGG`&% zMQF(T40Qz70Rl|7Z_^^OWj?j49mGx~7|&W6k_K0S0sR59({uKF7`jf&E`Ro4 zG&nLjA!J4xJ%JDU=I3bwtnS|jVF+9SnUI<^^QGx;zXH`694C5uY`*Hy0~4WJrT3pL zVlq#&aK|(@fC~3Y=cSOau-muxwarbQ_Bh2v5vnn;ZvaTU@y19`@AKEMbnh26(L?+W z*1G~v#dvwQvfa9SpWeKAvw*AJZiMDS6h;e*(qI#fukEKzIzR(J?WD%F|Cy{K43C>LxM#KP|kj)0z~2ItPT( zAXw(m(Nj}XLn8!z9E2?#ELD2%J$c9{RSz+j_b(V3SyMaoU*E+hA|k@ZHfnVE1pXip zC6frKuk3`?*PVkoW}k_kvTt1g_0c|t^DA@_x@<2rp87NjTdho z2q_t^!T{Fg`&kxY>5S~0JLcSc#DXjhB+}ATQOR9=-~Fe+LLlvTZ)RA1WFSg0J!;|g zVJM4=xq-pI;fzlQKMn$T9{Y7JoOJ=Zf0|f}qspq&>b*Vxxm-6zz>W4xtUz~!Lwgn% zjg%X7$)&GfZ~T1qW4r3MLvmh=xPrO4!<1Y*5vYpSZ?*A?HS=Z_5gd;CpSmq;Qa|QIiL_C0>|h+4#99MBY`9@a;brJ!-t zn0GWo>;k6Od3lE(tO39w^;O9`46y2OExsFMrPI zZXMMbl=^aoM{4gM{08Y67Ah?nSp%5Y9<2`Cl91rH?0z!e_%Q5oAhtU6nE+5PA=ZE_ zQcdMhQdB(LEst02sREH>D=4TR8*5uvH#Ib5{?V~$yetW(@^9$))*@q+y##uI z+wlGS!!|aK4fXA_>Ux60lQ2+DnX4|n+@1kJU1q8JznP`^}2rvE^Mr*{uOKQHpm ztGGCm(NUA?YX60W6NRYv;BPRo4*)3!cpXr@9$tS=Lr2#M5m_Bv`c76&?E}yT{s-H# z4y!m}zkR*t=4S0*T^Fnq6K>$~N#P=If_7#sd7(h=S99~dp3$zh;)qLreKs~GXK%xu zdp{?mFZnm{sbqH?NXw5wW|jy%IRrA-$HSVI zk&JYMZolw9S=Sdkmz$cs*Z47{r1-?ew^mEG*YBjni;btk4q3lHko-g$7N7<1uiw*&4AbfZlxar4m%P?M1LVffsls_C;PM4 zwMCbvFwb8PIyQ`!`%IyMIzs`7oR`f=KWMiA{c@ptu)$;(2_uB`GhS$n8uu7F#ga)^Fe5odskt$Zbo8AE_Qdr^R=s0Di+B3}sW@2TOdXPhn1djOf^{{e zeg}3wK0b9D5d){VRZUIc5WM#25v)$z{rxXlSzO3#%!0>Q?5On%y1}^WbNsk4yK+WcIX3ewB)drt|gujDQo}OB# zC-Wcn(pnb!iGqG@Z19=4d%jO*2E!B))GS?HQ!wa;Vc>m!wEUA6TbAvjID* z+Lns$qeoC=W+$2|ii-vKTpfur?@W3;>ZPFm(xw5#jFc^Ab-*E$7eE)3Dy~X~hyWLfiz+T7r`4 z2zxwPon`C9LJ z+OJ=s%gdFyIbvORkxdv*T;`IT$tLX_)RseQfdtBp45?0F>VjubJ%q)sCQj!^G$m zW7B||+Hr-V^~VYkkw`yv&lP{2(ZqPNl3l4-pNf>aySu@R zOs?R?hVXWO{mK2w0gT9Z;sZ^{BJRF_o3R7g9{>$7-|w0mDoQFG=YJ?>-ceIy%+%Q0 zS5fF+m6je*Q(H+vVdkB1>jXW5fL4sxq!D}AW=5E_vf?<&ykp$>8oo2(=9iY|g8}!< zp1hxYTp=rDAH&OfKJcZ}4~#ljb`Ce(M;(FLmsGjpyOlHAEsc#c<9CZbk(cX16;!(Z zT#@Brj0Zj%i?K@EK6r4h2w@tU587PZ3|pV4wwL75-)E$FtnyiO&JGQ6-@MtHv9?7z zz~R0v*V9Z+%u=P&unz*wK1AZ%ckliLp`K=0*iSrEz{?0=lalHzSW>cLdX5}yGTYp7 zOdPap06s?sj0uHXMqtoc|NdHA<@oIbRAjU>Kz!`VUPt_hwP~^G3KZ(-d;$k+R7)~p z3@`tHS=pU3b+etDF?W+kDQZBZfKC!Pt815kY0a%WIs$yLJ8zFH3bDrkn{lWL|)*Q z+XwQ(jG^eE<+GHOI#%|Q(2-^x!=vpaMiLo0yQ<)}D8D*{I&r@f z{C|O9g(_Nj&(-|;BPR?m(^e%Y=j_e%|`wSFhwZUC$>EPp#_Bc{aq}dq)b7W{ z$Ac2=4vt&_cJFH}x!(snE|&YBet&?3T+a=f9^1W5mb#!?Rfg%6{ zw`G-(tBP8vs}DmW0&<#pf;=_%jl!oy9Gs4O_sCy;Mq^-z-`bA+!*jq%n|sxQ`JB6sAkj_=!0;vfwVn;{Eai#0B$2&`I#gpVtikjl$@1QS& zDeIbj$=R82WWKIO95sK~o5i4si0{<9&<}v?;>yZnU-E4viBl0WsC+{`OMH9-r)H7d zt|}g>og`uO<6oc4!K||?8pmY!WcOPa#N);;06SndrkBtd-*eDd$giRdtDurz|7n<~y zx3Ownc`^8*OHbcY34m7*9Q-$LQn@h`#}Fq|tpcc>28Pj@_E?UIMM)@OaCM*Gy#27B zNJhm*jI=PkfNO!2sY^+yC=1$iAVy$+Pro`s@_V((y$$jNrH4nFg9z2HG*7mEUqB@u zo}P{5vsK0)m+}~E3hF-no38Q~qeTi-N}Ao{@Mk1*vf+Z$i7;x*US zy1To-(Lp1T-i+Vc+Dk`Bu1?+g_{Y=n$5{$Y$MT-5B`~pTYeUCGk}-#Jo~z%w13N&j zXx;jagvJmS(Sl+Uuo!g8M<*VwLLDJy0X7%lu3>QZDNsYPspXYL!R@cY_ADuh=qZhb zz8;;5y3=F#wwEuXpAz!>-@wiM_`ay<*Yxz1me$P=A%|4*w7akx!0-dn()u}1PybJV z{|RwmU@y5D)%}D7Ejv5@<`$m~k@#e;R}K%UJ&#;n70^J(QZxUxBpYoiufhZu0aOxj ziSI1k9zH&ZiikMaU1a5=(KVR92&+<61s9R^ty{eY24B8?JLTev(ae6@`2;U1TFQ0B zBek*ub7lUoyg%ln#3P`Rpm8qr{fKD+BEq{Ix~)g9%G}&_;n;C`fk#innJA<1Ny<9Q z*VnJ{PTvhYx5mUgu{o4fj3J)!h1>})Kn5wz^G~!vbuB+=Yj`N*^)v$huI%T@M&LaP zW&ke6FCbueH6{I%x#<^W?HV3K!kx)Mb5qmL{!9~C@cqE=nVn=(@zBwCc+7d2*(b}h zQHId~!B-0%(qL#r-N1lBMMbXV{gMia=~*L5f2Mnwa*4n<(Bej}But9cLkAz*xaCwe zCxnG1lyeVlg9KgF0$c}Rtp+RiQ<|{XuVF7jx2J)+7XSMXYB}(|=yHh}FJAmySqZDb z#JK*)-t>z=?)lubK`2$WfL>!`t#s0s92-U$jQ}?{j?XFzLNPbz)!Kv;E^oYNs~s0T z+}4)L!m3$g_NmK#`QanMZX2d=kud|Oa4SF%gCM>jAM&*G%o8vE@1Q;Ko6Axj^ri;h ztNqbo19uSFC${YWeS$c*M2wAt#B5_IeCt*lI3#^5k7X{j z^E`j+MR?BVO}>BdQ(_2iXLxk9d0YOcJpQ=`{^pkR%Ou9SK};p3^Mx)l?Ji>OD0g#B z{tQziquJ-0FENh73o6TFJp*D=9zEhrOh6FWF!)`uO`2Fq$J;#j=Z9!w^ge0<8JYb4 zWzToMVSEc8>xFQ`nI-fmiNAlEs+1&`BBkb@F#9e^u_?lE(8p{O-QV{U6Z2IMQ6Y*Dwg4*`n1g(F zN&7E?&vKZHN^T28#53&z8*BXfcY3-3gBwTCTpG-o`v4D^HK9-&R;OL$&CXa@(Pz8( ztYZE(O4NdK#}wlrNq4ZMNZeFFrlKWd+>Z-1`qMZlSutH>Z z3JDHe4s`6Cv7;rnTy3U-b`X|RBbq5{Q|TEqsTqR_*Gvt>F5kuDe;RWLKg5K0S5&A~ z#%JrR1sFE)1a35;3Y)ZH3kgV^=y^A%=X?chuADC&dyl@@khtV)B=4Moc>RY}Sw-nA{ z@p`1JM5IyAe!Hz?9YKK%@D6}RMQHe>rGvo00qW&7?XHQ%jC5-lt#)4yd#^-rj!MGY zCQ8&F2wf4`2kRrk;^LR<*=~E&e*}o6gYX5oUfDOGAdA^qT4LgZrXcbKYpWvn5>hkYbZ+4-d-XRGEBN}0J zcoCO%xVSn!-TIf=SMeSsHx$W+1x{k|2KwMcFOOPurCg#0nBZ$ul zdmj_&ZV5?BJ^_;=AOtR6-q+>jx4eF(z1lpdJkHkiXEum01Mds+YO`0&l;xDb8^$d^ z_~I5Pr#(nEz#Kt4&r*RMUMs`F+!txY#~`NOzzLi2Nc^R&|&g65^(*;m?gf5fEc5i5bq!$vK0zdTpV{P0}vm?O2Qar@qsp0Npp| zB&-o77%cZ?vfKa`{Zx?Xtru|w>_#of7VP&SO;e3PhlwF<5+{`pW9GGVx`_ba< z#j&kVVULc!w5VrgUwfrT@Aq#8#BDV3{^!KlGb_|oRKRR`t$lbyDUdMlC1Cntm<%?G z*4OP1v#V#d8xUGFre$zOMVN_+1+{rT44ByrXh#Ec-e zmVHaH3)H=N+r6Em#gR#9*J~> za2`+xR~kW3SJcbZp$U+x( zbe_LB{=2vQ6$Ak91D*1?xb(@g+tp^}^K0J-uD$x4h^}Hpuz?b>>1bkX{5&n~Gv09- z4;M8p(QK2`{9uwrG$HrT&C4HjaY3EIm!UnEF5)dm*4EoH`!-e4UoaW}ubiPSkyg;qIt@ceOQiA1#Xxo9|i_Xs_rsMj0TN%8WC(GfB{|+3dr(H%3 z*g2$$2)n0iEKipES5(>Y5ZT!+IJr1I-31{&Z+xQ5V}o}eKW>f}KE%MNW@h%t%34W5 zXGVpA0FeM>IfMI>-%05r~?ku5_+Sbe@L0_InP%Zs2NW7_^lqd`NY6=7jNAIQewEGH;)q*lFt z4QztHF=4ER7y}Sz!x{8gGCbFhdtuwe{_VoH*Svyk-=C#?8ZJ@)qHOR7yi*J$g3{bW z*STW8hO_q&mxs6yfOWOC65W|*gM@W<-;sSC&C`^}$L%W5_K(a$5g%1-`%|Gvv0vyhQaVQMhY$Et_7 zL7*s-mz$dl+m$J!KLxxA4$I&E-D$wi3FF^wXZt+p_(n_HKlMnf07#8D3S&eD$tR_4ltrr^8YesZnMM zPhil{W@I2K6nD6r-9W%7-D%c-SRbouqk>)8Zn8(-QOFDp%W!GwSa|s6JVAg%TkCPI zj0`oNVVv_i8*T_o;()f_kMftSMR80lVFfx(U@NDW@KnL?z_;$zUah-QN+y!%hI#@` zYg30RUTd8FnQ1&fJsrtfYqi11)Ebj`w6a~Ou=RKKlYpTL)+bZ6ZL*yxF3$OU!$GiNA%$;#1R)%n&5JTo&Jos z=WDd==AJ(j0*EtPFVq<+>yJ^GNJ&f#22=lobEt2?+^O;tCQT7yEMj(FY|3BkalcwlGO?Yoca#vJj+ zNR`{?L)y zzJHY(CjWRWT?c8Q?N8$pYxTc!ou8NpW$#F=bF6gri*jt)E`A@fRtcXai{ zj@VNPfe{8qSAdr{VilLTTuvwf_92MDhGFoF#xdjZqC5=!7g9_(bOX%qe|}vF=g=DY z;`N5s1z&D^9qlxAN{;@}+agv~wn_K_vh;}fBG?uIJ*;rFH;PoW-mLC?qNbO&@tkLE zWgd-|O3kfVUJWqMP@51H~a8??}f5a3otQRfw&G< zn*q}pBSBpq99ki^z?HY$>+3!3?JIm(EOcx`zQ9icf*@j)PI&rs0G<_5XQ105Kck3*^Az`4vcY z=cgNT#b`sqaK|9af}gO>dz6ohijFFiQ8hS(JCm<93_!A;9>wv5o}5dX(47Nv$9IQZTmss?_WJ3 zzsimd+}x)vEHQg2R;xq9!_dgz2EzpetgjP^5Dor-wLncH4%xbqDeO3BAq9R52>~h; zhUy6jZaLnnPl^#*QPH@VhBUc7`T9gGPOY+Hrx%pZcG zro(1d*fDt-14bY4G6*;oJ{t;7r127w3eid zY9f=Gva+(9!ABTDO#M(euf6>Qo|ORmgZ!5JFfQcCm3+2l;aNE2RYeXW^J(0=Kj}4} zg98~B_4Xb)`oy*3k^fyhpK<*Icz(@nvOhee11vgXu!Sw$^Vikl_i4Nt(!Z>cEmn1O zk69?5{C^Ru1Ra}cK=()Ek18CX^`&Mkz6KHF#JG9i0Zp(Oy-6sqTIMwGUIBoq68~ zQwWPEeZhbQwk1br=eJ=jvvLx-K@c`O6jR+TLv{@yIL`}PYY^J^rE;$E@#(%SqImoN z6?fj@RR7`se^W+;P>4A8%-$g*LS=tYc1Bjn-V|k2c9HDyu_HS>TV(Go(y=-Aitp`n z{eJ&{uj}{6&mUdqa-Hk+_MWf%zMqfBY58h0&;LY$APk~D5|os2kgux{?|D|rYgj!r zJz!vH2%r{D1QiVpKt;Qc5+py~8E(SXUvrie6fF7n&2mwOnbulx1~@jLj8bA^n%9@y zhhU-&o$PA;KcIWC5~AM!q?&yZT9ArWWQdY8Fl17U7|~V#d?oyI0rToMhT?^xPIWS1xFb|p)lbZ(`qb*(+_%b z>-O;wH-ogiT-s$EoGU8q@2L8mT!Ou`Z;mtWA(Jo^}Xm@X-q-Tk`g`D|y zj?MK)iDLcj?P#EntGy3+F6i$|WzjH?aIP$-1ry&Ytk345w zgu5qr8{Py58&^45073|ns^(u{#4xw87#|x$5IsFT$o<`g}SPF{xMa&<^Cl_c%G)R-b>H8uI^sfQ+Cddsylkscl2=8w(*ebaoT_Pc2!zt9o^AC zvudgtWgBl1v});6RueZV>bwt5UjoFc@@!wfDM7Sigw^H&L5;{yH&?Qz?=s%UE7sk~ zBBuLgq}zl)OQ5MWMOW4;hLe64Z=h z3}$;KEuU?K#N|ko1}llReq?5F_>b38+ou}5a_w9or(uf48B{7{L1Qgq^!V{7$k=Mi z861vBuBOr4QkZRV4KJ-b>efoIPZ{6-<600FRz64V8B)NI)Mh3S4@2G4U%@E9M4{Wi zie~@%8e(*I{ew=bSWfp&NXUb60j0ph}Z4vQ=u^B^Q89&AA6r59Ybj8T>%bEy5ggU<3>?MfZ2_seDeo;t`H zwkPhse-)nh=`f|!H8426%jRC-?=VSQg~KWcS3BnGIfHSJ;c5|eof{B3y*rSMQG}c z8~G)dNt;{-PbMgIee3k}Hy7`$*cF$5(c}_S*O0&VqAcf2$UXskr@BZEJ^pL%k|^d) zRCf2Gh0=F&RWiSnhK`sW$=-|ReV_FIDO7}li=$J|!t zCvyiIK<=zd{^_7Ct*E!&OmjvZNpyC0ATNWew`ZmIj(YGZ zzVrN9HFwg^!kOCSgarI|Y?Zj&u{Ln7Rhj*_q25r7=9OSqnPpi}lvTJcg`ID%chFLH zW52A)9Y<36f_|9vS)5lNAD6I*JQ5$DYNGr-lA4u1*4HPq@kyyb$!!aIP?HkJRodG% z?D;uZFWYpx11Msv-!@S_(^ipIj(p137B;-J!FnX({>6^Avy;p@QIs0@2Ql5d*1XNA zYnqSxMzLNFIH?x7Ehu~a<(VCs5qEu=9rUHJ_=%SD78fz&50>#pK_MX@BfTFBMC(0& z97HFlmsc56`bdQ}Qfy7}Ed0&{b+DXS(O)-X6>aN+ z*yFP#Tb$m`PQIRYR-G3sA3wHHt%R}som-ROh!`Fo%2G>F-T3<7Zlk)2JK4oGKd_l_ zDK-qENcMnbM)wl3zduAs_Fj+O;son(*HBz%l`h5QNU<(Lo zQ#6&E55z<8Nc<8``LfY|N~Gd`cNs87tZ`kFhYCua8Il9-4L(dPdm2Vtje(gF?_0vm z{o9zv#>NWt%58R}{gUmTrhmz0tqwi=v$-qk&qhkrlWRS8+-n9gxn#)kgk;Ig-=iM! zKwkXrAZ1}0Zm>Il)wQT$*!AK?Rc%DYITX2mhg+n1wz5pJ4+65&)14SNIgg%hSjx8N z6k2o~`kZA=OG|aIhg4IADZULO(aZ z7T5ogWQ^LqOoG6Y4xq~;-^WvbrWILC6G{`hh^B6kIjyX~13fV*DVGN??3VRd)d1A| z`M}cn`1*2+?XlHQ=M}Qn>)R8Bbz_}-FB$3C`}j!CqVec*=lN7@3S~o+?T_3@RJhHC`2v&R0|c?C4ow? zsu$auAO8LI>jDN7CoH5?qe&QjTa!i-N$s}+$FRhP17eS!m zA5xGU)i*wV6EJ34Ukt2tpqZw#neekb`L6qji+uAL2A2K$4 ztF2A{d_O}a<(a9de`C+|34rp~uPfE>(nnKg@OxlGnge8H@Pnx9>z_cR2*5x1$vhcf zTvXnZGZ@#5X$8Yt!qYp0GmV2oLl^o#4_7|s_Zqad$uRQ*Pnefq+;o4x1|*(gSv)q@ zf!+b-7GeUpCKx4DRKi+Kq2vhvVXmnw5N$$Liv_hf4e$UrZ<3Ng%8(d=GA65xotY)` zeD(6{mTR~q3azJzIRWdY~fR1E(LK4jie>@-%zYr+jxjJ)Uqz!4-LFS-c z2Dr)4NMI&{>+k}6CqS^b^xmehZ4AKq5F40@1;2HbuN%}!4T>=g%vTK-_Fz>i%F4>ReOfi2-FtPnt=h+GedBsgR=QHb#yr`E z;oEnR&Q7gQE<7P15lN5&GnD~(Iz!{!U&^RTXhA36^|sH+UO4?#kb!sBKH{WTr0!eQ zy4l4IP>hA+l><}%Di~TmeMC>qIT2YAH}-!oZaRW%o)VOf`{O%gj1C*BsNL>KnX?Bb zPQOY8q^2=kl9J=k_Y=jPmt3aaqqu{j-f2)nGb1iO-V-8$A*GhdaUWFsQ0TIb>bq zah<_nDb}d(-;84jlh5M6U#L0_zFlkZi}a$Zm4zrRAaWNm*oSePoLtI3^o`}Q7sxf? zd1zJgI9yfrxyU43WA|}I*Zv)Zdj;p&jmw@j*U7b>Xj_=`_1J}etQn+m+F|9hdn?=9 z`wDZ#ANnvR*cxgBr>H(IhrukW!acpYvW|%y(!tT3c+mmZdt7 zQC(I^%DmOu_1W030p%~%+P$7w{rPk0J3`hMLXksrd)<>>C~r)!zHyM(#Ha5XN=*T5 z9h0p&gu}w+@-wxu-6;YZaVrBeAL(CN+z=l1I}vvv++igj& z_&m{eU%XEcC8K@gqeWria}nI_c zG&B!853O247D#su>_sOnj5MEb{du|g&%eY-tpS?XJ@JHxsbJ5jX#w-ifbI#)6{;m0 z2ja)c*J*yb#kVgyDWg!KpT%%VUgFL>>d-t?IWnvkq;i>d{A_+h@S5Z$b+S;itu1M5 zrLDzi`T4_}YMC}GM(amBa{jeCo)&h#3#%FDGX-gdR}r=CF0PzS!9i`xZBB0QKgNpz3n8~&e5`R)

mFd zwzqso*E8!D@^wi2k6tg;k6B`60!M=dtd~oL3GRPSb&`9Fe7W>uOmeki|?Q@|nlc8OUz9@H2xrk`Mn*dUm4WtLp| zGn|l%mUEjJuV-5lxSxEJ&UJ9q_t$em^7MbVtgGg!{-~YL4B2j_G!)vd0bE;p`&q>ao*1W&kY-gwbYtX4t>wKc# zW`((~T%TB^&Ynf@dK({?*dEj`$xKkT0AGU0cq)8hf4fOph++O9hO6e{@GF9jlI4tT zu;&>o=4(w&!ePVunwRrR6A`lIl;qF0p??>(G1E)28*SN{Bv}@wHF)jYDt^B}vj=uc z@9f__>qddzuDm8jvPUZKk+}h0fBXrBT^uj9Z*4L1aQR*vrxC~SdfgS_ul&0E{O6{p zf`zbam^jYA!mdRYA!em6UHMg6D^;KW5KvT|)Lz zpPtl@J((!Xtm$;1*z1y^n|Z(30(yl?}k|36v$d$lK@M z1Oh%gIGwzHac;C$~ig)+9x3V?#r`F!F+kWbfUR0`!&GOtse?0c=g0 zCZKPI#1a?RyD9Gn6JujQXctse=nRpkxd%69$T&Nff(r(;M4@I*RR$Ioi$E4k*LrGY zk~3(|Wo6;Jy16-tB|AWPu1~YAW%525ckd=oCWZx5S`RKfkc>X0sK&6B8W`YbqZf zAJ`yxgexkx1L9^Sr03=)Qv4mbe&MRct`Lb~y~ZUC3AKLDu=FCMNU69K_E9 za4x()^a29W9%A57gL9ECg6(nYraTN6FJBB9!Bj)e4M`c~D@d8<)A;^73as(KqJ2(C zn4X#fe=>s=R35l5OG*UhK?aZeu2ldO<@KYYvp$1kV~3b_6ye>kkVOU)0M>Ufn_|7} zMFZnE1_KFndC-Zf{PS@8$qE%Cqhzb)r6uGg#1P0y@RIPOZ~_%s=cQpUS6MOzH)A0H zEFmD^fIh==IiY!<5!46B-0$C)5pcVL0D@0Y&>J93b@j;HTn^+qB_)Vzs?lE^;zOu{ zBje)mkQVx;M{o)%7Dk31_EwaXtc)LmUs~1-hU)PB3JsoAIl8c!3JN#~XvB3uaW*od z`dR45%*+{#!2@{e_6x2GX*_$D1%o0`clT1rDn4bEGjE)W3dNV{{UU)J?u{BRyBvcb zD7E4B)rnP@j3Zm1{DX*oeooHP%8EezPe1g!`14sPd7%Ebv$Pz|&v^9;>s>20^4~|P zs0cuV980G6UKuETq2Pt=BFsH6o%<6pne$3XK`GX_fPpXgU{y_2dYuM_co_G=LjLhx zCZ>BvMl+Cn=aZWaJ@)GdSDX&O|Bvr~3cY5XGgHGP1;%tlMprjZ%mivTxN6}ff0T5m zys9eR>Iqn`K@}9&MXOl^?u&k_G6021NhT*J;S2#k8%`iJFhG!da64;L9<-9MTv)P_ zeF&;IWNo@ew@+Y6KIAr)S3P<4aw@p2dF<1ZnBjbhG! z-fz_b%CZRZIySaSU_zl_f*~ekAVD2=trvDKHaHky2n_=tYO8ekJ+_RaW3kEO`5IR^ z@vW^ne#(c)dlax*yMY{gSl9U9>} Date: Tue, 8 Oct 2019 00:19:22 +0300 Subject: [PATCH 089/167] DROP(plot) jupyter kw, simpler to monkeypath Dot class... to support _repr_svg(), and it works without any flag. --- graphkit/plot.py | 57 +++++++++++++++++++++++------------------------ setup.py | 1 - test/test_plot.py | 17 +++++++------- 3 files changed, 36 insertions(+), 39 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index b99a425b..057faf0d 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -19,7 +19,7 @@ class Plotter(object): The purpose is to avoid copying this function & documentation here around. """ - def plot(self, filename=None, show=False, jupyter=None, **kws): + def plot(self, filename=None, show=False, **kws): """ :param str filename: Write diagram into a file. @@ -28,9 +28,6 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): :param show: If it evaluates to true, opens the diagram in a matplotlib window. If it equals `-1`, it plots but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). :param inputs: an optional name list, any nodes in there are plotted as a "house" @@ -52,7 +49,10 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): an optional mapping of nodes --> cluster-names, to group them :return: - A :mod`pydot` instance + A ``pydot.Dot`` instance. + NOTE that the returned instance is monkeypatched to support + direct rendering in *jupyter cells* as SVG. + Note that the `graph` argument is absent - Each Plotter provides its own graph internally; use directly :func:`plot_graph()` to provide @@ -93,10 +93,6 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): wheat arrows broken provides during pruning - :return: - An instance of the :mod`pydot` graph or whatever rendered - (e.g. jupyter SVG or matplotlib image) - **Sample code:** >>> from graphkit import compose, operation @@ -115,7 +111,7 @@ def plot(self, filename=None, show=False, jupyter=None, **kws): >>> pipeline.last_plan.plot('plot2.svg', solution=solution); """ dot = self._build_pydot(**kws) - return render_pydot(dot, filename=filename, show=show, jupyter=jupyter) + return render_pydot(dot, filename=filename, show=show) def _build_pydot(self, **kws): raise AssertionError("Must implement that!") @@ -143,6 +139,18 @@ def _report_unmatched_user_props(user_props, kind): log.warning("Unmatched `%s_props`:\n +--%s", kind, unmatched) +def _monkey_patch_for_jupyter(pydot): + # Ensure Dot nstance render in Jupyter + # (see pydot/pydot#220) + if not hasattr(pydot.Dot, "_repr_svg_"): + + def make_svg(self): + return self.create_svg().decode() + + # monkey patch class + pydot.Dot._repr_svg_ = make_svg + + def build_pydot( graph, steps=None, @@ -166,6 +174,8 @@ def build_pydot( from .modifiers import optional from .network import DeleteInstruction, PinInstruction + _monkey_patch_for_jupyter(pydot) + assert graph is not None steps_thickness = 3 @@ -292,7 +302,7 @@ def supported_plot_formats(): return [".%s" % f for f in pydot.Dot().formats] -def render_pydot(dot, filename=None, show=False, jupyter=False): +def render_pydot(dot, filename=None, show=False): """ Plot a *Graphviz* dot in a matplotlib, in file or return it for Jupyter. @@ -305,13 +315,9 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): :param show: If it evaluates to true, opens the diagram in a matplotlib window. If it equals `-1`, it returns the image but does not open the Window. - :param jupyter: - If it evaluates to true, return an SVG suitable to render - in *jupyter notebook cells* (`ipython` must be installed). :return: - the matplotlib image if ``show=-1``, the SVG for Jupyter if ``jupyter=true``, - or `dot`. + the matplotlib image if ``show=-1``, or the `dot`. See :meth:`Plotter.plot()` for sample code. """ @@ -328,15 +334,6 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): dot.write(filename, format=ext.lower()[1:]) - ## Return an SVG renderable in jupyter. - # - if jupyter: - # TODO: Alternatively use Plotly https://plot.ly/python/network-graphs/ - # or this https://plot.ly/~empet/14007.embed - from IPython.display import SVG - - return SVG(data=dot.create_svg()) - ## Display graph via matplotlib # if show: @@ -355,17 +352,19 @@ def render_pydot(dot, filename=None, show=False, jupyter=False): return dot -def legend(filename=None, show=None, jupyter=None): +def legend(filename=None, show=None): """Generate a legend for all plots (see Plotter.plot() for args)""" import pydot + _monkey_patch_for_jupyter(pydot) + ## From https://stackoverflow.com/questions/3499056/making-a-legend-key-in-graphviz dot_text = """ digraph { rankdir=LR; subgraph cluster_legend { label="Graphkit Legend"; - + operation [shape=oval]; pipeline [shape=circle]; insteps [penwidth=3 label="in steps"]; @@ -396,7 +395,7 @@ def legend(filename=None, show=None, jupyter=None): } } """ - + dot = pydot.graph_from_dot_data(dot_text)[0] # clus = pydot.Cluster("Graphkit legend", label="Graphkit legend") # dot.add_subgraph(clus) @@ -404,4 +403,4 @@ def legend(filename=None, show=None, jupyter=None): # nodes = dot.Node() # clus.add_node("operation") - return render_pydot(dot, filename=filename, show=show, jupyter=jupyter) + return render_pydot(dot, filename=filename, show=show) diff --git a/setup.py b/setup.py index 4459e176..bf91ab44 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,6 @@ version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) plot_reqs = [ - "ipython; python_version >= '3.5'", # to test jupyter plot. "matplotlib", # to test plot "pydot", # to test plot ] diff --git a/test/test_plot.py b/test/test_plot.py index c3cc1e31..37b3c211 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -61,7 +61,7 @@ def test_plot_formats(pipeline, tmp_path): solution = pipeline(inputs, outputs) # The 1st list does not working on my PC, or travis. - # NOTE: maintain the other lists manually from the Exception message. + # NOTE: maintain the other lists manually from the Exception message. failing_formats = ".dia .hpgl .mif .mp .pcl .pic .vtx .xlib".split() # The subsequent format names producing the same dot-file. dupe_formats = [ @@ -150,6 +150,7 @@ def test_plot_write_file(pipeline, tmp_path): assert fpath.exists() assert dot1 + def _check_plt_img(img): assert img is not None assert len(img) > 0 @@ -169,22 +170,20 @@ def test_plot_matpotlib(pipeline, tmp_path): img = pipeline.plot(show=-1) _check_plt_img(img) -@pytest.mark.skipif(sys.version_info < (3, 5), reason="ipython-7+ dropped PY3.4-") + def test_plot_jupyter(pipeline, tmp_path): ## Try returned Jupyter SVG. - dot = pipeline.plot(jupyter=True) - assert "display.SVG" in str(type(dot)) + dot = pipeline.plot() + s = dot._repr_svg_() + assert "SVG" in s + -@pytest.mark.skipif(sys.version_info < (3, 5), reason="ipython-7+ dropped PY3.4-") def test_plot_legend(pipeline, tmp_path): ## Try returned Jupyter SVG. dot = plot.legend() assert dot - + img = plot.legend(show=-1) _check_plt_img(img) - - dot = plot.legend(jupyter=True) - assert "display.SVG" in str(type(dot)) From f787f26f7fb42464b904658b4c78e6a4b02a216e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 12:36:30 +0300 Subject: [PATCH 090/167] FIX(plot): LEGEND mistakes, SVGize, egg graphs, ... + graphop label renames, + DOT shortening. --- README.md | 3 +- docs/source/images/GraphkitLegend.png | Bin 39077 -> 0 bytes docs/source/images/GraphkitLegend.svg | 150 ++++++++++++++++++++++++++ docs/source/index.rst | 3 +- graphkit/plot.py | 46 ++++---- 5 files changed, 177 insertions(+), 25 deletions(-) delete mode 100644 docs/source/images/GraphkitLegend.png create mode 100644 docs/source/images/GraphkitLegend.svg diff --git a/README.md b/README.md index cf4536de..21dca0ef 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,7 @@ For debugging, you may plot the workflow with one of these methods: graph.plot(show=True) # open a matplotlib window graph.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg ``` -This is the legend of the diagrams: -![Graphkit Legend](docs/source/images/GraphkitLegend.png "Graphkit Legend") +![Graphkit Legend](docs/source/images/GraphkitLegend.svg "Graphkit Legend") > **NOTE**: For plots, `graphviz` must be in your PATH, and `pydot` & `matplotlib` python packages installed. > You may install both when installing *graphkit* with its `plot` extras: diff --git a/docs/source/images/GraphkitLegend.png b/docs/source/images/GraphkitLegend.png deleted file mode 100644 index 3b4d273bdb697fb3de02ce4d6bef27fe1c1bab9e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 39077 zcmce;c|6wL8Z~~a%tKKmV}_KWP{~YYX*6X>#)OCzC1sv7MM$Mgsmvs$!8|1m6w+j@ zND5^N@mpI@=RD{AiqBTSScJ)5#fKo24(*J9UXA&yB%39v=@9(H6A5{iEs?JX@y(no_a7*G}ufIk+ znkz&=BlPT#U#T*+(?32n)3LD3r-=N0xbW~>OL!DdGEIH|Fq^kc#W_@(;xXLG>g(&9 z`i|+p@8?5fXV?G2HOTp2pZWj&QyOhNWN+_ttkP6M+tG2ukDouqMfE2VFI^J#SVlf} zPR5@F&Y05Mv7{ zI(4?I)Oqz~8RyWuN8br<-1x1$kyqs`XJ}|>*{P8(9sALrKQ$U-cm>!Z*0M!N`OJ6( zhlctZaT`}RM02;6pE;F$=@LC}2w9Ju4SzQ3mXVs8nv}G3_5API;?qA5D|wI8EzJE~ zJ~`68oR^ok?BPQp85!dtdF>dUh>Q%uRU8}@Q^~EFI(D5!HuiV3GBN^ZCcbLy*s;TQ z_UFJ0Rj;nm-r8c@dIpMrR@S=x1s3!>cQzltUTAJ%VX@D`A_^~&p1A^RtoOh7GC|&(0(>hj2v8Li z6qw#VL~n8MVDHC|!5=>!qEvl&c&hjN_n4%O2Lv~5Vj-XT#to6Y{Crj(p1D1#V`F3M z8t=Gu&j&fP(k@%JegA%rn3xzjkHHlNhK9xV%^c(3KX8XF=cl3YD0t8zhBXWf*zoej zXMg>8ar%p1c)hJlXAza*ja>oJ(abaBUxFS#){U^kLb1ro%0}@ib9Hodbe6ei*4^gj zKO(es>#A;_Dc;N5j-DGHb}1_>+kW5xH-2JdRpsyBzk5G_4!wV3pbx9G@x(9Y#fbgz zPfDOa_5Hn?ySp@(%#meUTJ^`TuH@rmadUHfU+Kr6yFb6C$fhi*^By1Isjj05*v*X{ID@|jtM-DsTq z^|RRd^(Gt(n;##W20M$%xidS={oug^I%Z}q509;%pI?nc?DV!}8`3c{20eUuW=G7y zhbL{`+_pC@J;II%Ek4$({{H@no9fquNDgV=^*3r}r$&}iut-Kj^XJd6*mZd;y@G;5 zOKWRDWF+I;Qs=Nu2Op%}x<&W=`SW9*p7n|1CTWF*%j0x|w;g?(kmBa*8dFpxMX9=N z-`vyFLs_$C4b~(>Czg+M^C7y9LaW%66dvq%`bQ;sdHK1?cWyYu0U;sOh$nZ%^{F51 zt5@4~Jvu%3{{9w3$H38CiRdU0mwquqmamgPCue3uj+{KkJGW z6pCrdVdkweGOcCqc9beC4;97L)ph#k3)N;94HJ{qyb2x^iiG7O&W#7|2IGV)cnt3Q zarxuNkJisqRcN zX5m$^eqC_T>(`HU9R&v&wY0PzH#ckS*~9YeitI%>6)C9|l=Bxa(jxZ-2M2GL;J7$k zXn2zaSyez%((3ln(VpskMn;-mUS3UiWxrLn-f=Rts5txU>RH2^dqN5dHqo%~Oh2!< z_u^vG&WoGU)6ys;m3Dsb>sw-PZ%-*q8ugkD4G<9#X?gjwro`dJlMkVJc@mVSfkW2T z{y1h!D4Q(G{0Cm7*4EUhXGH&a&b1z=AmeSspF$OHZEL%P;x#_VdhXmgJ%$TzD%veaw{w&3Si6*KW+@f;({-+idx-9sGIR##J@KGX*+rEMuc zGj)96aZ^)3cDB>~Sik;(RKL*dY*A7=@7%c)tFmxHQ(K#)I%#R?@Q8@h4t}<_g8nAX z>(87%ZS%g&ogC)F&n}1H_+(;XDVFTz%||jN+!*TjUQ_zjb`$3nd3VL%ygTvrHQ7?s z2A#u)Kd-;Hl8r5J<@!Bo2;_C|7#J9Yldf-ghQEzVP7b%J4fXLz)zi}(9UH60pRMbV z^5sY1W6KjyTbNy{x0M#zdoAkZx3_8%9CqK|-)Ec6%*Y_Agi>fpvfcX!o{T$_B&s{| z4@i5B_Qcz5l#qDhk|~jY;2sTS_`^dJ+cWO&?oGQF$g+jcNA5HrrKnZW{GMy{@7XE4 z;m%^m*9E+f%FA1`4R6Mq)u*SY>+af>kW7`5m>7ahy`Gh&rlGO4skeQmB+m7|sC(F9=p+tn9{`r~8%Q&1y*x}32*RP~XzAm)F zB9e-se{;{(Q5H* zPHe;Ymscc#j1RwAiDeB73!^>x{e8sO*M&_jMCZJX^rfQ zgd1|@3U^dg)acj3O7$H(=&_XnKn4CLvUh$?PBw(I3iaUhDl03yytz$H>FMte>gllt zBI)y6nC0KRd2`XxciZrgBsk{hesxy(oY^7Z*%ZGnJSr+UKcC<+X}|fiD0SAaZtONK z_m-hpHz#eBJN-kLV*MmR6bPdV5Fjog!RQE8dv`ZKstDC+j;X7YQv^^+#@leX$9lxfVOH;dseJk6->&+lSJ{(Vmn%-Kq^X7Qc~HOsZH9ke9Tx(kHHqI z-B-4S+`lj1*Vp&Nr5`UqM@L7Y%uV;lBFKVcW0wQ}oR5nOsE=HoiNb&+)KKQ$U*t7r zivrb)N8^;TrikoG3I7LjiP?pbEy$6KE6a2Ks$xAo+DjR&F8xOSFkGdsy`5e0)OSr+ z*Mb&(vFOT5RUEgLToXxQ5fMS`BhELeXr>YkFH{#KJTw<;d}vg3w6smlGT=VHBP-J_ z;seQR-C_#~39&)KS|@wwRZEM1aj`Td4aq9s`;nC6cFSB;`qJ(YPvISc}mWV7lLR{pZH?1!Vl_sT={xH^-{2fwi4**D7+S+uX z9Uri~qy4d}w%^{CI(Anm54|porCLg}76-y_{?~dpbsds%0{giBv>HpAklSwGKAxX! zwwwIL%SM?${Yj4Wb9`{5@SM;;=bpSJzW)F2U#$Nu6j`kBaLa#|?az0V|7pkyi~ld} z*~eg(?V2@D9zWi0Zq9YPq$Car3hCg`;lmj~MEmsh1J0de02Gedo2%&yIjZRJ<}^eGe4XzgmNvjHdx zHSgYWA3l6oU1v5;!E?y^?H#A^bLRptUR?b=RdqSyh`bj{xq71z$usy`WEgU2_*8wF zupUf2aw~v>YEV6jkX$*rxfyxCWdaY{0df$SkCIN1;2oDYY_pT!Rw{7)i2+vD0BXAo zv3#UUG-8hJIqZT$Ladyebokk=E}i=Z9aBk+p&2XlTTmJP_D%Xo+Xj-lWBHWX{_`UG z`Jfo|*@2Y8EX%!nZ>IXz1X44u;NoIH*rnSUZ#a7Ns7u$QHE5Mdp?`StTan*_Z@S&z zXJ|J^BpRy)1O$)-h4g;x*AM%%3%{rK?c29PA*sz{sEuAgK!8)#S1EKke~f#7gMh58 z$)M@qgH+0bjWX6n(7gZfq4nV@x6U_jdeI^{zPb_JQY=Q02U_U}K(`u1pViqh>Ws|H zkQl9he%~k;dBaEx;DXJrA*y<$av_G9dt24(*Q<{lIilW};~}ISRRtiXZ)8N^AzDA9 zLEZnnHw}63?I%xePP=y~UX=^j` z80q2#`YAj6YcuLjNQ_a|h7B9YbCbYAy#s&=0w|>Cjr`|bZ#CFfC+n`gfB!zG+_AMU zU%hf4>(k=IKJjMj#mB`}BWZ#7=oubn+PHCJ=i9do|NTvE@|zSi8OKhZY(S23pPxI; z5!r0ho@1=LcW)Rdm*>}%BY`dHdH>Q`w;Fm;RF;tL3^NG2#bp|Vsp0x z4obOoDW&%tIcsZcciwSYNjgOIsr0;y5CO|~s{w`QR{YtCc9X}CA6I?<@Q@^PkCY>B zgDvUPpRc${**{sOsipO=-_Rz%!NwzZjC2#7_fM?n+UmI0BZbTE*N;z*9eHcszkeSb zqy9Gtg%E_o_Q=}F%4fC#rO+uVDyH|hZ9p+64}`#L%o6<@Jz|6zG`Ne_89A-0wpL?s zgG_v4B0FdzDd!Fe;Ou`rhxvt-3Kad_CxJmpk^0N9~*{P<>6Gd*8>2L~Z#W#0PwdWt_VIG`uRKQxpEdx(FwZrz%emq)_I z@yL;+xg(uL0^{T3YlUh@cKs6u^O4m3u?ZJ1hT(VdTmp)U+-7EG2viDX)v8r%#l>?! znxdoJASy~l!G}pJC}3H)ZXKz)D05bba30ZC-7+cUBcHdR!1BwR zKc7wor@zqt@L|Etn@RFl+qH}7nyOz+XQwI160lSSA5|kxpFYjraT6~OIFX*69f4{@ z>W#OzHxRS`wQGFjkz8D`hWQL%FNnQ}=9WFyS4YpI>YK7hq2j_%G%o6rSI+@ zU8%s5CH+NuFQ1((s4)rcE4sQ%*;0E>p_Fo*3~To_+Wp@znHyS8`&bdEgEO(k(gO1;7@alw!cieeEHlJSvNs& zWcVR;l2m+3UjAqt;_Yw*DCj^?TUN}>%s3)kz?P(AMK>#&9z4j4gq!tT&^PnWn?|?8 z>&h=>rChrflaR2guCcKn#XLMVRtHNA3Ix=}G76{%^+=A5pb`Rc=F>dCb~wJfyAc$D z_*i0Qtc=&FH6Rg>Nr45cf?j^_w{OfQCMGy;X=vM!g(i+4KfZ#KGaNOu@9Cv2I9gQ> zbB|dQfX*MB8eyTOr3GiqNJ@ZC#$H0cA&2*(j^YQm*Zn@@wPH=g+}uS9MW$Urs-MBz0J@>i`RbLt(*-SmU}oT0Ohd?PtH;NGcs%s z9sZOse(Bn!po@}>=ab?>#-m)W->95XUbk_jWc$+mBGEv*c&@6hF21iF`70InH~EK^lg z<&i(W42eM)KAJ+5!EyE7|;fq`*M_%k)1X8og?i48QY`o(3df35nm z#tal$*y-G)q9JQ$lq1x<45wE&0uPf;2dz#Bb^ih(*a8Csj}eTne1-!+XERNz-md-Et#V2rmUlxG4@53=c<4J>8SPKea=gyrTUpttUy~q6lsU526>)yOs zV`F0jngfL}4l9_5BX8RfwFY#UeyMM&$$AuZf{ss)bVY;gNw6@}=fjyJ@QhF``}57y z(JYgu0F0Gz`-?&_&?MKz@~ditbFHbZW#!|GLG!v+S-JfEx5rPO)O2)k5NvZ^Kw}>O zdba(I<>DJQYzMqrvUDk{*UT#Q0!s~jeb!x9WYrHu9(Hu@;`wGStK(d9V_iu=E+aty zt@SV5h2{iYL$&%of72;r<%<|jvEY>19DMMFWl>3qMyiTW*;xjX6ZAJW21@(m(59&P za8XfF$sPYl2dDwsBuuhmdP%wWgnFT6`H}kJD|L06%V=r)L7K>U4y|(SdPEP1OXv)A z;^;usvBtqMX5z?h8dg^P=m2Vv3`1fJzkVDTsI97^lpKCm^XT-?;26V9Egc=ZXP0F< zZ`<#@e=ZLv8y%Nt(}SvQ+qPkyb|lUkd;$h3^PUg`Z5)i0NRGoNvB2D%99ux-idGzbXKq{NAHXSdWL2ZUpOr9jW^6#UlRMT!Ar01vf#+&~(}X%t zQwx%^uA*#3%ozMM(u#N7h@pcY`9{L&rg!0<*wP!Lp6xhA`j z?eV>C-^f=t_x1yZsOx0Mp$}cMckf;txa)`VC#IYLF2HewL6gcv(%PrbIvKyPH}1-n zNOX98c*SWHlra?k<$RKC5iR%JtO(d11Q1=9m8X0zG7=p?I7mOztDQdmYp}Co9nEBM zbyXK{>-D&T7FO!Gs~5rySG)E+9<%0AJ;9v)<&tA}!A6l?bE*&{v~9m|ETXxb`$rB| zPb0O0N>XIuks~DC_((UukWNf+dAagM%V~dz9OO(Pm(}87@q~qElWkjH zf@qkfz%*gOX*f7I5>xicfr4l(IQWpzcy)Dk3EokbnI;q-tDzJR9lM@QJ~O+hS{o z1VT8?r`MFZ5$ozYc^#0ktWgt4ib+BmNg+`aliZ!5c7-u!8?$a;P?vmI0#JQdrkV(QC!7HBW8X{)b`SZQhXxSe>HoAXre||sIP2@p~N?(L`I{-Hu zIk~Jt+6gmS2GH-wQRJ3&t$Oy1`N@+fORncB03?&HFh%J!8&Ke;35FnWKTngTm!3R% zGCVSJj zS1Kq|9!0lnkYIvi{y+(1-Nc+D2OoIQgN04rY|lWNTaYRe2ktTfx2fyowhndV6J<{T z$d@!-Bph5^qL8)ddGpP8?A_Z40_48RA$FU?hiQQLmeJ6VFuo5-CoeDW7!Jaz$x^3R zG=2-eWj9$qdVFboq!s&}qTtDJb8l*BY3YRG?2A;@SRnfTnh?e%*K^;3`?h}nNS@_; zcVxS&U-a4q)t=6KTMix4#N+5+Uh~D~nPm6c=@H`=qYG-_Cq_SA6v9FXdu;uajh5tS zR?ErCu0}7R4KQQl=opUewIj4>OIkon>k)Q*&5hPB2Z$ECpWXu{9(+DydZsZW_5nn% zHn=o2Y+j|VkPtRPuoLn~IATB2RX+H(RQ#IKX~E)haEGQ9XXSAUX@DOCUK&ecQ%HYK z@;3-Hz2V5%*s!ZtdC~q#?0b;24ode@T9&E=(F@YO&DYA<%c%N8rw*r^K6x6&UF|n3 z$n*Zeii?uhfVCo+y?n_AMYbNb%pKSP(XBpcT6!7T1t&Zh z(4hy!0_63^rVHysVvIjs1;>cI9(wog-4^{p85}(#N~o$@A@pkS&{Bt=a(?{uX@|>5 zM?N=Tg!Gw7moDM2LQRh!_drx=K;g1+bCWDCEGk+7UDNcQ>(%XFaze49H8t;f?zw%a zD*n(s`g3Y!y1C8{Jc(X6-+nsz2Mo#zrFDeN099B}x=1Sz9mpVA{y2wUj?)Ox;=^e+oI<5gUw%uoc?#|!HHS(r}c5>3Ewf*fX zlb8ZchukD}ot)C4oNVi6{*t1J7jHU?e0f7zcxz_YEOMpo27&BR_l~{`1#Mo{+R6&H zAP`*DKK;R^ZEbBNBTJayp@VE%j|3eYW3-MPnhGj2s6sXchVm=`%9ge^O;gk47X6$% zILtk{g%$A|IGUTAYeCjP!ym}&V?aX|6vDW=4`H{@VCED}K9mJ8ZG;-`t)aIDxFM-n zL|(D#O#4Dwj=@8>6jS3U&Ng1ZZx-hm2VSJ#yt;AomMtMMi^)neao734G4Lh5Lqq4l zXz_J*b)jckB}kKaL035N;X_4GH6iTc`>i4H;Nu@hTQG(y1|Uzz8#12>jud^)3I*-N zL^N}~wP8CYG@u*DDxaCOZo8$wL?rh<1mbYJ#u$)QrH2mK*a#4d3mRXl3&z*bf7_vU znf|C8GyOcz{#tB)m)V>9>~Ae^P9CNk|EW2i8OAAnR*r#@QT&e2pVRqPtlau+Zw)o6 zXYgRy^p2I*hApo}qYRFArNR^1XqX`o3ynwy#LWS&M%0ZVzM-KZ(EfaboS1|}9agUg z{*eC0*qx=4PS9s;s;8G$%zjsdDM#1#%DsECp-gKG1iqQvEH=1AT0Hb9z{W`~J9Txd zfT+Z+bDqFU5(R$I_Q^C=vb@u+7WbUejSpkeM;@Pce0N4K4@#wD$lV8Hoz@tr@;ncC+LB810{6?qTM0L-ZXGXGQKJ8eB{VEWO6~- zd$y8q3g|z-NR@QY2Jba6_wNv8YqE@acW0GZj6z#`3K!#py zBT*Q|bKi-rLz)28JUj4XWJnttIu2ePDjZ6WGF3X%b$9Gg>#pztOd|#=VPWAaXqrS7&dn81NlAgI&eYSM)$l)FsN*2PYyn^gVG?8E zSF!j$z{8vgjPpGd{GBPsRcMe{Y zKJZp9C%oA5^2-^q5wE$<3bC3il^35r-IYCQ_+i16DSSA@M4X47CkdF zL0!FHzR&|55wq6zFRA%eVT}h)ck_>YEfIXv*;xhLZTk-3UF;EmI-eAr#N$Ff@X=j$HY7vGZ09s3<5o%Q+9 zE%rKg(<}O%)Ge3%Q@Jg(*rR;To+aL)>=%{?ifyUqr~8-Bp(!E2f$02zQ0Z>n6)Pg^ ziH`%I0ZyHz5>^#FTMj?1LZxQ~6$MK|QeFV0J9<#3c~+Ht7dIVL2W^6%4uvfuI4rCd z@kJVf9OGOmmrkj{p&>z4RnmHC!UDk|VM>8_29`090&6J#(sm7L*xr_oj!;NWpPya| znx6K8DT@}RP8~hN{nRixg48uM$a~`q*>>cak(M1Ao!GbIpCeYTC&G%8lT+eLE8`@X zG)n{BPdB(Y_1_X~@OIMsn)|4K%x*S^PJ;8|ceI~uv0=>I`+I%PJ~tJ8cG1e0e)a0r zPp>MjJ~KHidS9vH&Wrs$wf*^zMq;NX%v$GXXBN;r7<|#ueR%4ddgV3VG_$=5sfliL z#bcCpMdn_knsU*PUzi*Frh9>M8b&R8PSS?;Ui@zq)I`)M|hJ6lHuMVY@U-?W+h zXmkGJ1@RJQN~g~t(@B1XAXSdN4Q{Frs?%@ER9vd08Q2tNQ&;6$W({MTG6$%^opZgNmlc3;^L<1?pY#llzj5NBw-PiJwR{+27G}VRmdpC*ha&~gE zf#rf2OwRiFAfIf8F0vgaBCy<8FIv>CKnPGm$Y|W#T((1b-k)cxznR+{WI}Byv&Px8 zN?;DG-P~gTZ>G3In_;vxHSIjJ<&T$bEOT*N;X#9M?U9!>1X$Qjcak#=t4v|b>j@vy ziy#qMlzW#N%;Y`+^LBl|)TS578;|lB*O^N0cJfJdQ1aEYVh^D1f2r&{XWHlf;+nGf zqu9N?;#s0#n9!^=NuzOrs4;maR?_)V;=u2*Vjtfa2!%V8(B&uTsj4$DJ`9n}dzSUK z@n`U=yf;I!`g;lwyEH#}Qk8ABT3Gl--{6-OlWgcJHP*9|tUJ6H?B#9E0i@-DD{9<$7UywLKL?~IvG)}Q0mXmn`&&F{3Q==ABF=xUrQ7YbqS80 zc}Fd-mO~4rmt?cKRVdW%8|e04L(PV@!qZBq*r(>APg>sii1)A%&B@xk%8k9n-pkeJ z&3@}2wRqsY8ib0YOJ~}l)cxfTI!k@88I@Q)@TMJ;G_ZCR{AFoubSuTPk)8dczLm~O z$R*kTS#cZ>-xKtOV112dPyb9!PFU46F$WhAj9R>P_4SD-x%^E+7=3(vRsmWAj1Ysy z#fz_X96*295XBudmkmVoqT=E>)8&S5Uu51E6&K&>DIc(y_RgK-zoxvuwTrJL)w^pT zY9iM7l27?=oG&<$4G!p~J%AD9yq5)-(SZ1bZqUs=fNDsP#3>AJDkwkyXm+FTkk{Gj@Z}-;KI_%TmU-#C&JL&Xr zXS%Zc{Bh1lQ&;N>87-PWgfj9P6Ck8N(Lp5GxoV1QXT6_HbdBM1EchsA`$|Hu2 zGdFKNt?1ehnGCdoo+pi$=XKzOF*y|#%QWFnAiju!fcUt9r2uo`n772DJoh>?>oXyZ z6OCRd-d1{aXyEW}QI4gB1-by6+bz^@8_i~C=7o1oAF69uUtS_9x~}}=i#PJ8JI{RN zsqh#MU{;)0pDrIfelPuoIqm6RlI{z=C8s6_DkaXFpQH{k3F&hvemVO{VyA^V8xs@L zvahL&hg3`_YgN>NJC0OfS7FlED@{e~oemrg-n^)8GC7%(m}*wpcg>NRQ;YNhL8un0gQ;);NM5 ztE#iJvyErsY29M~y=0SzMUHwVs~xz+wasP2n4F-XI@ap~42meJR22BvqT#axP$mwa zs)wohsR~_PzJ3Yq4qE4;q7uC3x+_zM_6W08w^UJD_h!x3Yl>fJK=q%lwtHANsk1lL z?!t>oS#OIUjeB;Tat|C>zGo7H#0l*>0`#NTV;p*_Mf~~HwK5_<0tb|Pr`1`$jJIk*#DTXe7qUR+E}9WUC%G$ecRhWD8> z#O#UIz8+NK`Sa)fA#HATY@vX0($&+eK?RJ%0EV-(Gm-URBJFxItMu}f)w3k;GdluT zOIjMeWn+aoNek2{d_HEPzm4r7-`V@Jj^0<HA z2JJIdm11f>I5N@KUb8K;LaM3@K|cG1!8b=Vf{|tK3Ttus=G|9l=rH#Co={dt*oW0g zj@@rL!6s|?`clzMd<&1tvzX?87|ffyKuam(;tEI}*ikgw*Z!h_-$CAPG9$Y4_E%1e zhH(Ivd~eUzn>uHfrO<^2l_;jncvipaT&=sixlSS02_9m<<-bp`Hou;=(Or-{HafEQ z(OENpyjYp%0PEX3M;OX!F6-`jR`u-Na`@aLW4=9mGJNmA&5q8~JzL+JZ*5E#PgbK@OScW&PfB*QCtrU5uv5O_2&83A|EI_eMLgWp&9aHB**s9}uZ zS7p)A&>+wjjxhYA5yqkg5KD|CXyP@@%+?^Slilq3^eF^~iFgNKKz4jzCJmDT$YcND z;AMmdg~UswEg0Ph$xfydh^qtg&?rK+7qclu0FS?LVGJZzAGk2$LW19`CC{v6b86$4 zS2r;Pa-5hG;0M7Bhn&~wT0)?qTM5?`^!WCcee>p*mozPrKMN~nh(%W$ZB^Dw9nafH zf^g(n_f!QKI929Jb8~W<4#~~DFXn?!JJlDkf?uT$qKh@iMlizVuU_MWHOuh9n>ycB zas3n8UDHZmWf;P#;gQOoF#G@?gbfseb+M$QMP=i{V)4s;?N8XYsabLV{OlWJDBqYC zA*BfbWprYq9$KSbL78LQt++ae+^nomw2j;LP0r@zsfg>TsZqi5Em7?93Y~omdI5%S zI*vm_L#Cg9DTd0OrQ+S_nLUo`FV$JG#kT)~yNVps`WM~7x8s-0TPRUal*%0Yew_KA}%}9fA*N%V6avP{Rtx6y2sRu9-a zu0F7R%IT1OXYV5G#cnq-)Lw+P5=^a1X@pnY^tTB%m6xE@sOApH&7a3)44Dx_MAbkK zQY~#}AoLiLjb7gB9sCZE3}Kd~fir$7`2F;LI#+mmAVbvxab&%mzZqi|7DEt$&(rz+gLecX zwZCK%+3iu}{9#JHF+KL!OwDd4t_7hzsVdBirk};L)=CQ(pIuEb{jpQ07TQGA#qF8x zdfIQV65pTak0uSet$6N%_`tFMoe&tH=}=X|M79hDt7j22q^F$CSFT_Kukp?C0p>Jh`|&Gj-EIB z3C4faP~$PjBmU^yUfvR(uZ4KwD&VC-PUHBToFT=M18yH4P-7MhuSTDcbDZw?JgPJa zm_65&M2kOs_)y;PB6RAogNlwgO5|0T;j!(@^o&oGi*gj2m+IL*gy^s*MNVJ* zaIGq>x&uE$?rsOuD8lXP&rxZ<&tgJ*NTvrT~O@ikUxKeEBjL zYElelLG)ZyF*rzsDl|<2@$C}CMh2d&x4&PgxE$s}vOAD=m+6TxF)}h*u&{(aGB`NS z`Ga?lRhg*Ab#uU6UaLJb)L-w?Ry8)#LS$a4z_z&Y^xKO9r73N&u#wpZ0;N!pFqp)L zP*}N{tvm@F8i-X80BMkRh<3E-?KIAM^Y*Qvl+?aK&haEHIB4S&q3-gWYt|4iHhP1& z&Ux@eWFQwA;?~$#uAPyl5`@k{TNDJ(pWg7RSFdUTeF1_6_9@t`;pFU5-?!qy#2|em zJ)^Y4n8?S^<*O9-=X=Zz%a~4G2^|gRgc`>MhKTj=qieR{(cvnt#cUxClh@3+=+kG< z(D()*v!-XZ>mj)jvERJ(NC?8P>cxxYz*lhCg|@WjC_!!_zzwUQu9Np zQkyr|0HoX4+pjBDMvX^@tqIBPesGJ<1K5te#|No!>=1i9hMnPofhF4NtJ@Q(PX{rS zv|2?)rPF6xNr5>z#&1bsl8>Nqc-i!!vZwPaH7aN5%V#GwOs9{ch#UW77#3bz&Dh{Z z77F7tb;EB;#y~ICA~hYj>(U!_SI+}V8fJ-Vz?DMBXv74MG^X@mW9Ei2{?>ten;h1d zu%hQ9%|86=FeK)4a&nTo07-0Qm=Oj5IGm+y>*%0;Gn_A&!5COIlpGsa&oPx%S>gvh zyzKZV2Eqtze~{w@?Y4}aBXrw07pbS8)-!Qct~e#g$H<%8@Z@<@?F;c!)wf@-TGky^ zL;BMGl9edm>NH>u3nY)95O=XAX@}}5Q1|YF?6h0;ZG*p zkhvjtQW2vPEY58YCbp_x1UErUR?x37+e{&@DzNktoPJB-6m)rC#tMIv26C$KB$3FabA87Jxy*T0xX+_5ZE>i<;$d=C9X`uCU+}+xRWxMZ(I#&f%`L!eT=fe#RUE%K z!?m$7zi-+lH?vtVf0E1H09DBJ;WxUs<0*lOSAI(mJY4AgUOo#>gqX`)7fMczxXX7f z@K;%R>g@2?C!8Ue9UZ5sARjWAbY8w8is{PXEiS|2VMp!nHuz(}wA(&-cAqL7=x?=qIP(2#h2?8czk=A@sfqils}Y!@ zbRlA#s&+tYz>$P7|L}nqxgr#@1({@pP+#1i9gOaf2;pGZITbuM?ApDX(4y#@6cjHW z-c1@Lcv(_shveHU0Dy_541K1pj=nkE0#y(>;0-Ao@HH_o2$XT|s6xksxWV*R{;O`f zh^Q!FY9TVEfFHyZ)8PHr#M=+C0@I-n=;#x7BKNL1J;Ow1ostd)Uej<;8~UlLnG&vb zTHfvatVsO#n|78}g+;?-d=-|(OIM2Q(YEc|2y==CsJ-Doo|Ws{SKYRr!vNXQqT`?R zgx9U>EqS3*i?l~f3}n6wiJHfA?lJI9oP{1pC_=~$p%CHhK#D;0&h2DP?0Ps- z!H7K{et+L^c!Q@oh%mH96E*7V%dY{}3dqTEVvtt{xh{3Kd@UWZwP{Cj1^{07L`qg{ zhu}iD!?_zCzMhehSo~%+s5Q(zZP>Wc=es8kTqg$hp)NEsau%w=AWCrka0nwA>uwwR zYWMiee3ARrsCCWvdY+ei6bp$x%IItC6i}qkez7o`^!3p2`K#4)lB;|67_e2$&o~48 z84ZUo0*34UZOhcW`qR;icTxONv$z7 zpENfY`^=t{FfHmyJi%G{^SR>aj~_&j9UdNrv$*B;`drNw%fNZ{lBpL{Q_kn9e!rsM zkc>*r$U$le7Z+D)f=byr^j8WxYn*^#|9;Dz3BEUJD+Dd^PT81=`rWcil^bLmFZ{eEPg zAIXAjJ-O}yivku&@w$O1%+S;a4!l$Nr4zT7j-)?QEk{R3^=1!0J}*GzX;_%*VzOUi z!)UXO&Jo`VJYU0dp79XUmZ?=?46rUHBVKRkP6Abk_drZ+D6vo$`5*Rx7EErS>|y39 zhr^&Fia05eHsxRvAgUX5sfy#5@LWW)ARaq-N}UDlKNy;upF^G&5*A*DK+|w_<#SQI zTifINq2*on%C2Ikr4gFHBn^y=^~^b9Qh%+Sjn(XLi7@@RdR6)r9}}i2z5s|Hqcv+t zZPw>CxXIYCFz&JnM5!FEaEQf7*w!O$WLoK%yE_>?2i=3ux3&+GDOcBYd5iq;Jx|$K0<(0Vo*Vd5rbhlu@Q`iPmeY7lXZp@ z-pFYUZa=_;(NdhgT3j3u4K0(*1no04yl~>}#hrS3IP*&YP)W7{{bG{6ok{7w(^t?b zW4&SgHE-USJ>T&q6^E91L591^xo?--=9 zA@v*speB47!u-6hFqzmv6(l?n_6GcodG_a#W<(H<^a>0FL7*nrJRpG)*E!mDGF<|E zgEW%{JEZ6F;L?ogq1?hzG$(zC8<>1oD(<=Zqg97r0P`I1m2HP2S|4NR0H9_A2nHcl z9Kz&W&Hb}hH}o^LLHPzCh>0;4=Il&}z{DpDyxNK)b!4}*0NoPw9AqvO)KD*;k&HvR zc2`7`_yUM5^eNHv^zT@M@h~*|>MDxFR?6ovp2{H<(=% z?*tqTyF2J;N3}DOqb1L3@9^V756Mqdn+}!-F^;`<7nrBnRM)Mk_jV~}7}6q-I*bo} zpamYr(67+VOJqR>1v#Q}#iXTK(Z92YbdVK-O_)M@0OV-mw}vMv2qg!&IRJuRZCl$a zTzycQR07)##xr&I>}kLY5|bDB5#zb*H&yRf?}bHapi$DZa69KZ!;sV+e$xG67aAZU{3z^<5|p1XTm4n z(%v3&_wLDhkLB4_)Zv=g)#$wyZ=4c&kD66#F%wGqos~Ff@JK zu!q{j(lQ1{TLBRfG7B%q@5ACZKe75;B)^8BzD_PjL(v^6wb^YOTN|D(+jyC~rmc?d z*_AbE5x)KAO{PY3oKuEd;!-Y0%!;ll^O0pXaeH4|dZ4My{VU&(ABzjZlo;~Cpbn$abw4j<`;Nv_TvtVij*RQ|XR|!ux(bI7t>t_ji-)Me^ z+|h!mPT+JK2DL!!^O^t#o2_{BBC0h9T9X0EL(6j^$kR&4|y zRF6sMW1pX{Nr4BJOw=jvPb5!>W?;{rJs58Zfdy8lK>R+~1B?5|sR=WR&y_m2kl2h) zU*z_J^b?DnB`W5Zw}u4x3JuD*(v@q!ZF$um;A+RawZn-L?akHF+OdS<=%}0as;K(a zl%$q#^@X3|@h>-j99W$(?Jjzp?~}lQJ|CaT6P@RoOFk++)i`d>vAJa4^5%`hK6f_D zXq|C(HZ>~^!)y_{A!|FkAlw?$)@S)Aj_vCMi&H@9u9cBthr?0}LnQD$g<)6DEg30E z#U%yl*RL-l12E0=Cxj2e&n@QuQ#d*N_>0fbdCV1%D@XdSsF0_Ih>9WFREwTxPoQmVLpw?) z{c38~hx9hCi;YWNIK<)x6=tiqcid=`)~AMBR+TD9WNC$N6lEWuLDk>V+qP)~|F&dk zF~WiuD0_1eoo5`$jR=%pVDP0vUPqq>+X9QNj=;LZK$kFhtROcnplLtduO@#5m-RqA z2q)*4H^fq1FfNTeG5$7d;KGFq?(h}sonQp#$c^Lw`03NUIH!#so}R?QNb&s)oz0fCOf&PS+fm4d`McwDPOM=|_L)Hs?mTjZHJZUtD0G@A74S-hMzetu5 z=uWQAI#P{Wt8jmdCt*c~H;8=#%{M*w^Tk-x`qV0P`NRJi0Mv`c;HxFl zV8oX?rbxSdrVf%0u)EvpzrP?Ba027f@yk`FDVVp0Z$|=ys+QhClHR3gf%?ESq0bAr zaRWiRD52nAMoVPOFs6-Ej~FKC17C4Z$jBfE za===^8MMH)&~58tG82e9uj|Z+4NP@_J_-MN3o^%8nukhC`fk`Vf`HT`cy+SDAT2e> zUIp2`4ogxW^T#Wtsd@dlT$pqm9nEsSD8f%ehi-ZOx>1MUNBE?02zo&#blT*^0pYi_ zSWo?DM)sAdy8v6$$EFK$@B{wS^8q^B>KK@U#sNo3ct0}M3#<9QVckWHjem)e`(9Xm zeto(~rciF_XP&=$l^Hr>v5;sA$U18%>hN6=SkZEQ$I9ZxD}cyKWJ_Q-H7DVOszP%K z=|v6KsKDb;7js>TOmI*wrAdQNj||Piwjg>^_)pXb3MP`sTavuRfdxlY3I&e9hWc~t z`%DVoO;xrBp2~h(M`PZWsx|&=MU-dXa)UXJNJMX|f7T`)9bMwocahgC8 zM9?7jSRikcd5*~Xzn`%*7@v`PGh8-A;k)P7t&MKS9iF^mq4pevxef8Zf4s1F6eb^X zZxjSC+s41oo3jP;t_D(YKon1CWz z;}pS_Nk+afax-1IFsllhG=leAG+2amWLa|U5~j1s*_{6MOYPv*gMa8S1bwpURRIGw z6o(r0ykOXZpo-LDG85H6!_x9vXKM~xD7(RT_au3Ecwk};20vtt@`~}v6^j2JWEaeO zi4lXdQy)$dy%!a-2m`Vtn0sqNAXi`O9J3rm9_oZAAAWW+C{7pX51y(AzPrJW{41S{ zcX5!@JIGHynL0i#Sx=DbK@f@h3CV&HH*&cbs(A>fCl54alA3VC1Er>OIBC61hMPgSGH^(ev{1?z*P52Jb<-88UDTzOP*`3Al>P43m*p zR9$>z;#kD=G9w}@beBj_P>@Fcp?|WZV1{g_z)6Z=P6hH0xw*^I(vloBm`IC2vXZF) zYHDh7!5U1}1bIU|B>)ou$n8-DV1=-VNFO}nQrcpduIMz}QR-4`*RH=GHAg@K$y)$y8-ICDeDN4v)J$A4V!lun3Aiet5?V^M2QI%QRLwR>k>O!`*g%huJV z)JoHt3EwP*KYjAo9A2U4f{6~;ir5rsls~~>xqJ8SV!Jl6EJ@CR&!77~oa$!B!zhBr zLM;|rd>HqipLc^3!aqcdM(dBZn4nG6Sf*?v1{}F!yLdW`kdo2-grttEt(u!QZR*5@ z0FucBy<~^)xX`7e-LSOSuEh{0N>A4nH(sueO0`iNuZO)DC~pIKpCbg40r?!-yoOA8 z;))OWp~kw6U0*<-0PD_1Cn zZZNt*?$*i4!civEI=e(_(OAa;Jz!FA{OjxGj2sf=9v1z|`ED!rM7^x@dRb)dPMrPX z>pU|FFFuyP7=jSKfIxZ!Z!9@V|F*incc~5GdFfkz?qh=iB<-U!Mju|v10dtej zw%FI>KH2@JL$7Y_{Ql`Y0MQ`1Wp~V8xpIXJn*i^@_0@oZ4uK0zE1(KM5UN4C2ULVbgLq9N zUGxqC{1Jf{y)YRjgwzztyJIVOM_jnI9jyc54ajGKU=n~t-PyXBdvu!?pAXjp1)wn) zxR9qs)SxnVA;fTfjJl-${{3W9NDz}g5C!gP5tUtzh^JmoX6^#<89% zDq_bTDWAI58MkT=dYlz2R}y}hxJVRF4);U{{28}+GF^T^P(J6|64QfIE`E%GYmc)n z@h1b()YL?({s$GnGcbm8L+*zmo|Wy}DcF-$eWt~zt8n0eeK&a|K8v)ROYX^uVTzX_38jhP;Ss~btzWjS#HRL8FFvixOo+P>u z?OjuI+H_0sjfHQBe1f)glecq0vQfT?q$S@aAJVz=o)58DEE!!oab z*>AiFDMenO?GC~zfaD+WNIw7VL0jvNlG&%I#cy+!8q#*0 z)Py<#FilVwcodcAIXV4pdAUmpXC4M=$XZ66APc;AdqX5^iQ^9>w>&Th^(l> zc`Y%NuY}Df$LhzALjX*P0|9V&5F5_GY$tCq{O@5PA!A!JLM3P_axf81HQb!hY#deV z!*Ci6>Wxl|4!BP&sNv!Af)ZJ1qsMYY5U7ck5eLq^g&7*yNJxv63jRs2s<`O(?_u%( zL|>p103T0Sgk-#X_W^Y;DluS=#NL{Kz4#vZdH?mRM}Dgz$*f5PU|}KYGf+Wdk$}Fj zeV`+Ut2In@QM8~PDB*2?Iz;He2l8>nJT@bj(Hrvi0+#{7x_$PZ0YtwFE%)EE{&Zy$ z6LZJCKzj7fFz-`PdvGkW7TWy{+qQZ9JtnXuKESSlm@q+&NuXqW0l2>0g)p}EmWnS7 zc8EJMYSGB(XrN$Au>ta7>PAWrqy{p9M^+%F;9SrvOk_ZCBo1d!xpj!&79r3#dYtOJ zV2O>n49N_^!*PmX%egaow7YgumA5AULJFL-ZOcQQPl$-nZWX=T99#66(dQkCoo7Ko zt$;dGb4`-P8k)GO^J4U)2DxY=)j~Q@@x^g8XS%wguQYx1SJ?Z8=GM@Gz`zRamM1J`0EeQGQl{44P%f$`18 zgUcGS^@QyvxQLzwDPSA844ltcvJn+t?S2L#n<8lAV`D#j|4s*+7Xq0P(gTe{tpIUS zL!#11kqI3)5K>8a>cHF0$eziwY=1SMO^Q7HoCx(#8oL27x&d4W$AT-UIUkU7T_a(j zYyMRi^%V7-&x}Tzozil>@mXs4*+P!07x(Kg9zIfe57@Dy`R4^2rspTuw>5-XH9H!1 z8gYjPg{X8Vhu=`s`s^}ngd~~jpdbEE-=wOX$5|f_%O4^@pGg1O&!5@VHMH&xokE2J z@#+^YyadLeR~U2DCXH9JI~{d0EzGz4jcXu6!8U!5%}%OMKGR0cL%sA_@+y>j%~2)h z(wn5WFYOP6s7>bC;U&P^*mGxkIEnr09~wY46__UBc>{Wd4dWf? z7!OlOaEAdWf;%9;j?81WJd%ANWzIUrRSeb?Os`b5tfEqf>%VQ@yycTYfzr?1=NxXQ z#@MiJ??EgLb-o^Q|Eb>*-XS}#Kfy|ewn(3V=b|=8&_4e-aQbrXJRo`?%2$YgI8L7e zIAm+TTNg{mbD9xm4*Fo*Zkn5a`eS-av|z_(`*>lM#f>4_qZ~&R+%90Ed24S92oz$=i^qNQ=1s~_lW}fZ%tc*U732yufL`|y z?JK%si$lfpT02cl>%S{U(Z*+zlLkTr!uLTcPEMMd@vm>?=h7ilA&nl~C40ToxVhm@ zKrFAIphL%oC-!rZu~yOX4a*ZQrmn@`l!RD?J}9IoQ))xY^WZ>|MI6E!VZL>`Cbi^_<64>vJ6_5LiJp>(>8dINX0^`yq*A z9}*)2wq9Ax^mAu_8_+hZBEs|0dW~p-gL?O*9o>r$`)O2LMyXC89#76C4`s<7aMTsv>!uB(m6OEw&d@R#cYh;% z-`Ob}0Bf zz}`AhxlVj;=x$lWne{C|_*|GpyCcBNtpLD_Voh^{tALR~T*duGV<6-%u%52vG5m}| z$rZtkh9K~VbaFL0d;P3Kr{s~jIbP55QELcDN7kc@?X;Al3(3JrROX3^8oMSkGgimn z@eFNmD~wP1DbQrU7v3r-Hn{GeMc_Jp7+g1oRjj}-V6L}@;tFYeqFx}7Hq1p59;>{VT6~VNbpl{y;jkx8#?1e98-!@!7?(H>0#R zyW&;oZv>vdt`o3D%|_97$=*+FAFUV8yUv}e<8(T-b4F_FT6i?&c&Sa^Puaeuygn3P6ZA%R~!m+3zp3v zT-qhSF|*+!jZlph&9ujn7Lz{Ju*5)*=AZq>dj>nT_lPU9ecq{w1>(5&nPRf$?ym1A zY4kry_u70bl6gINLuI%&$677S^4lx!UrBM7W4@;?&UU@8rmmo%IV7l7wlx)ypen{e zNLeQD1D{p?HVG3G*?U_c?H|yAU zpKzrwKzfSYKVj?z@prT8M?Q>71y(|3vFG=b;taI`#o07>dL`&<*UU^|2&~JYMTK~D zv-sj1FJ$D;_9>I?nlB1%diC^|q^WF6d0G%kLvtTHHt4=f2 zAEeE}`Jc6?QbpiYoIV83M+Pl{BHQRn&d9)~c3hW9x^I?1Kpv>e|b-HxgsO10lk-@E7IxKzZP_SrJ2bZ=@($xAFUsvU?;V^_*s6mGXk@YQ>3 zXykdq#9Aw85QC*$VCpG` zRdE%gxUUpv#2b}fw%u`+_e1I1KAxB5ICwDMAk={vW|KB#kb$vN%VN{M#LLTo@rNf* z{vc6I;ur^KmDDQ4&!6f^y;JnIzve#5kPo0LP~6@_)`|k64AoGwew8OUikINN!Cb|T z%|VI6(b;4Iw?mR0Lvu&YUa*~nvjZ;3p}jbmhNH;?0yAj=rXzfOpMhmJ3Z7Fb)$!0) zQ(1WA@MB>@g{q5#=M@f~-WR5n5iwP%Zu zJ7$@j-P@TKO%t$2tnGPuWiUKfN_L+V1knyDe0S_pfrse%SM)f?o9xKnr*$8cWkKvJ&=sAO_n~S(DCDXw`6Ax?CK%q!8RWM zX_RTbXt&w*>C-K+1d+TxJA!C$O{}7D7eA9BBPb|9rg_3DB?3?ngSpJYR*;CKa|q$O zz;(?gW}mYd|FIZK&AUC!0_lc!@5i$EH=>-%**$aqF>IC9dC4~>l^Zt4HaP{4K3e=Z zs%xvI;WV@Jhxj6Wo{5Ni&Z<(q(Z!NG43biW2vID>(6O7FBfTYGuIRSD; z(bI<3`VWv)@mwu_7LaQScAw}PQ1^>XW*G=b*#F{E5pvoWCzU#E%GDrNbyTuvQm4zl z?TX*D{WHVR`}ddU9SXgt6Hw}cmm!Wb3oMH6|IeW!85@NPt=oOLW_~-V-^^mUj z6B`Z#t&}cp%C^}gK0kIostfm&e)n!~pgJ(1n2j2oy^;F`%5=4vhhU+SO`DEUN}U?1 zMN0n22!lpmIkfib{j(<>Wn&Wc{gow#+cwT^G^0=s>%HdmT1;E#LV4ltc_#gPfBD%n zJKjk6=k0M4A78R|?E{_|?|G+O>ittL*)Avj{O5m7jE+1|&|aXX z`haAk6i)^&01fFX;A;OW{q=FqP(l{7!3OFCkvvOp;(Fvy?W+B`*PUkLYO;G6FK0yu zkudu_MazaMKC)IRR4#vrc~m#w+}kYF-Z3i5Y3ubLyW3yB9LnEYAG(I_QOtq9qo!Bk zj+;{6op|(cnW^hObGGIieOGphk6Q1Ty?%F{{{3-;mO|;?67H$2soAIg+n~5Ad=n~h zYrfiH`9hPGsH3Kfk;mk)!AZUR%p-4i?-^*Nec|bO=7jFBdBb?-Hf`&MZqtou5n2?` z5`=~T3+%IdP|;{T0%o#Bw-RjfLGXv4ZfXF4jl&ep5lloPLpV><#+~SbamD#WCgI?T z15#;*{)uc0La+i-jf`X^4LN{!P7vkmz%QCrt5y+66iF|))+@HICf}}wumFu*dm&VR zEx&(XFzSV?(gD)yOMFKE(G)`Oc9e`>K}P}hIcBMlL~I8UrJy|osR@DTiAw@PG?_vH z1gQ-q5E05E&F5H~3|a#YJWJt?_K;BgNV?C$fCwTx9OOh~hcBdJIEt!7F45Qt-Jt{< z#abKTRO2Q*1tzk5`1b8w$~t(ik%mtKECI!VDfb76i-5mXnHU@il<965$|K$}L?s9x zOzw;KgOu_d9pJD;jA@BGJUT@IO)h#qPxx{-d-k)FC-^a(V8amW<{f?@+K`(~@E}&Z3ZTGjgP4sdpNV7`hwsUA=V&M)v)Lk+J!3^&RAHyo zlob1p71b4U_H(H>>Y5s=Uhip~?t9D8udk$8r&i=+@j9|K+0kyz#q}}u=_|CyP5Yit zq{AP!z#5oN(XqOQ291IKT;W-_&1w}NHeS}5&bm}-dtB^yy3_PKH&Qz+q))1J?HCbO z4Gs?0vEla2bL6@|G?{d|&^zVjp}ZfW{*(Rb=UOhCF3Ymkc6idV$zbv0`gkXaig}Aj z@h^7*uk-$V7jVDB$m!FkGt*zV54ipk>EB@03}ju%IeYG8%n4et%-j{0<3$eo>gqyi zeZnp|Zr@q<@o#GQBzLK}tdX%YeFx=zTEsr#`U}mAj+;K8@&xsq`~3XfXOfmx$09NPI{4+OAC@AR_0l4wVzSR{_E|KF`hH{Q{$Kl)d8YALYHeRYcbj z=^5Gp28#Hd0xf{A6CLU=y))s_kgpGe7-_q^{@8oM2!Z?XXW#Ni`Jh-u2*xuoZ7js? z^$88-w(?U$KgIc;9fyf73Hc|%ueYf*3KTWo*E!{%*3m@3QJ_7aAn|~twl>@5ZLJT7 zV*bP!2Xc?!O?l!lHA=RdGyx+L-@bx|#n!A7cAhuJ^G#0_=)2e7Lkm65;s5*7b(xVp%nM{U8kg*#elLZsk)2qZh zaA5H{(wZ!NieE~ik8&!m2p&&Zh9Gna0}tW_x(70yYW@24I1h*+*jmMPL6Fga|ADlE z<13UwAi0Uh9ah+J5C*`83c+6^pvP>OJ?|6Sb8Xob!%tKR6>+LlMOhgk9P87J9}w{q zV1H80LTV=YEi$gZJCBUFf@PyCG|G@0t%Mi>@;#rWu|~McH-E0uWA8(cb{TvOcnolK z#31(dr4LIr1|}AQ z^-ybGP?WRmFn`W>eIum#_kqMC&)v(zqX=E8{D$Rtiu!OTWN32HP5wuK1Ewxc65JA* z?1@|mPNO8HARIJ|8$M{5<`942pV>C&`<|^OS|+d)xLDGV2(2Q~njvT0BPNFPv(H(X z16*F5U4Xt#49#9u6)CORY_ulc0iQ-@f#G2h0H@YRSrvLCfUSgG#}P+b(#y)4x0e3l zq}{FE z!#x;lXhdMb5QRi$WSpdog9zWdG*B!rA7L-$8=ZL;!5sGtE7 zuSW>Pxg3T}?;oiN3(Ja6M2hGG=?sK^gqsDsip`H(s|y3~YxODYL)fm0=6fhe;H7Ho`H7nAE(=O&~xlOW`7~} zH1pLfS9D>|Nw`*cHk0WisNP%Qgh=usTw~cga|7HRzAJ{@pG#>dgdsv{1S=6TrUun5 z2tr{b);aYO<4IvwqH?tx0_XoB$!PjSOwWa$XI_gmid-;c$of zuSh2i+(*ge1!x`+6sIs94U&2nkOPDv$6CZ?#=VU=;PJZ(8{Z88?I$4kM30nm{YkBl z-)Ve9G8Gm5S%j&@3^#g_f0UZ+g5)Y>aFfVX76=)e5faEngCs}$kH)RNInXaC@;>$V zZ$UZ^CjJD8n-~}bC8#c%nlL8@486!-ah9TaH9s<>oyl>Rm!D|P_lpL7FhmAjKn;R) zsSpXOqPF%SE3SK6z)|9d3_rs!p$GYQ$?St-&1a*XDswKFInpbSz}$-IL$DvYlK*H7 zsJ7w0bEKN2?LlK&8Q5mX9ra*%${;Wb&FdX6FS*3_JuFpwea@22ii3+Ng$bdAxR~!1 z^%xCTkQc1RU;u2NKT`UCgw2txe4pu#pQ?(+uo5u9(1^4UojX7{q87kW--7yG5|Ycq zK<$7arWDYm5rqyc4)KKH(?s49#t;mMHQCpLT@*2<280VrA!e)1ap5j5OEKC7wz6bbwbRQGp?!|NaN^dEu3DWYEGLGU3GC1s9$ zkfmfI0Lof2Ms>IS&=R5xBiF`wxcO1EE9p;kob4ANZv61}L`F`8O{m`1qup_8p?J=qVzM9c~w#-RaRN@&TTf zP&EAh5xu@kU4;PL_(f;$~xM7udtg% zo)7Z9j;Aso%v@I(&EU=?q1(QH^mLr%hYs5Ov%<A1?)z$v|8hh0Hsf@w`Y&9I=gso$q@LdcfV6L>Yy{6IqH2Xf1s}KScL} zE3F;&a1Uw2{>9(d!{Q>h^B5c`g@;Sr03;Bd1exMSN=JlfsPz58bKqK8gXY@Jwn)W( zw;3|nwrlQd5gWUk_@<4?MmmsI_M~tM_uKvB)kTv@Iz_-!G zw$QJvTx*_hT;h|sdBfxUIM$cUFHgR2a}oP+dNp%UR7Pd^1hdYq7H5~#w6y5TlPOEr z9@r4J+I{uOd!Zwn=wEX=CNr;=az3^C+{yc_<_?R81CMMtvgybZsgJXk-(Nhj39%cP zEoy&Zl%zSz<;=Rhfb!3e^G3dhOa8w0y~en#>LY^GNx0b>Dkb38RkDYT*m-|oZ?~Jh zb4fcbSru8I%$C3QeY)OT1vg6N1-^%OHUwc)LCa1hsI~CsQ1UM~$N&{qL2E-Q5;6|0 z^q82l@EI*be_gz{_(Fzs%}NCuVE+nvj`Nk!ZoVuQM}jy%*Ck%V?Kn&W!9(l${ zrRvLiGB_``swP!(t5K^LMeGGD!zlc8wEE1huy$;>%2I(0rANU&PAYlnab{T8+K733 zdMd%FOBshH$0L0lX|~w+Lnn(;!tgT>9<;SWdY+64&dlK zV6NjCq%2Y$d11+v!FZU&#pga@`(`KO2j5)D9KO4r&$@IM!wU9QYWGHsl$f7gV~kKm z?;PGN;B&VHys%Wm=st1tv#U~6qF;6DI5CMkYC@K$-GBjIk%(8n4a8^F{R=8U*@+=S zs${Y$FlD^SRX9=I#UVVMlaMi^X&4rc5;yZzp98q>K5qCI$@FK339g?GQ$nA3@9x>6WZsWBq|Ac>Y%O_nn%YH9jRA$Ab54 zJ9Bp_WuZ6YTa@JNM0XT5uevkh%(tCy2iv8@ASE^Ab;@OPoQ^s&IvOunrxd+dB^(Rmd2qBDk?6nM8aax;jhNQ#&#Mu6C*}mC>y%1^Jaq)mC2|z2w&KR zg)bt3*lJHvVXO8`cnf|=Y~;&c+g6YLmAL27(c4s;f@j@H=ces6Zy79Rr|LU~YT`G# z#jD{*s97hz%2^u5@0H*<_%RA4)#TSjqy;MAjFDz;=_&II2}vs4WhWyqKA!?V|2Rmu zZA_MT$RE>sbJ+u0$3y^?a0%~S?=*E6D0xzPqwP8Ly;ML+HLqRT^x`%$5B#|ih8s)E z$yGxW5e+0L*|U-7<;uh9o}=Dh{Bu;MSE=^im{4Wt+U=8YHCQj(ao(18+mTah+BQ*u z$wh6@+bw}CTMfJ!ND>|-`mwkdA)*S}A?)AWb~_%3{t;_8n4<>P(PwkUOd?n1yUKUR z4>&mW_zmd1JzW}XZE;K8D@!vd*Z8^PQK#I31RZLB)yqcI<gR5Ii= zwl7F|t&ChwZ4kqway8K?9OF??(#S#}0fC^F9#Ti#KI!84>%1w*#l}`Z z9*hX#oj;`@>s7yTBLO2O3W|%-PkT^UxE>AcE!%h@s(CJEmlhZZS2A_*Gl>xIJ`D&k zC>0%j$bY@!^9#A?N4v!vf$12)v1SVslQD1ObV zx>JERSZIv_Sf~sQA z7oxB6I*OMGI3cJ*FHq8LQBT92eF?$x4Il>)eM5l;#l^7dGV$$0y>)fsJQMt6xTuUv z74!V>2?q#oP1W#eFs7LtNWEt_S-&_roHxF3K3Y_tcXXtP3F9ljZ5jAw)Qt^eJAbgQ z?o}W!_cYJ7{IwdELzliBlZ4a7Wz_I|yl)YDdK=QI22IoavFex13~h{5`3go_d3c}m z8hGV@{#*-qsHC{K-%0~Ypag8P5Fxa_+<;He$i=rfP^!^rmsh<;*>Pe#OW7J`T(5Cx zsb2L=9Fs}i*m5S3`y$Z@0HN|kF>fhQxNq47tHK>QX)Fis^(u6Qr+ww- z8sk$vL)B0ls1;hA%?nB1y-QGGpf>B+%ojfMS5;2EiPBV^y8E}fBO1abC%jGbQeDEs z&Ggqsq9G&bQ_GP3mS=$n)!%#`9i8UC&TJM(<>%ZOLA{Muc(RQFN#%6PoIN{a$hdyT z(LR^^7y_6hM%xzmrF0=d@z;uQ-JQFf@A@g0AQR|$B7ba+CF2R$W5+%i;N>l4@)n1J zFi|H@8?+Rf+@C@B6bo%1$2+!m;Ojt6qaaAMGp1^!Cv|lJPt`;RhqSEh`1U7$evyEe z&iR7x?644Kpr>!C69@Z;{jau&Hum|TPu&*MGu@q>QD`TKB4^c-M^!b#Shao){S%qD zlr1b$$h8;1g;Nt%M-qHvkBZI$lqrYQ;n-+*{)^{7 zINpR%UmTEv+%uLn_T|gtNNRq7if88?owZ)IG~ZK8cvPY!VRvH2amoF8YUN)CJKWus zZddY~9laL&+I?jhovbfF%)Qhu$Y83ms#BqYky1i?I&zJwFo{*Aj83C6u?Q8)@SrHe z0_o>JwqaaDJ;t0{(@l75%sZ={BCp&Q#<}N9kxtEJ_Kx)?X<2p%39&FjE{^G8(M<~z z>GcRu2p&oJep(jx%w~p?HC(3I)Z65HBli8e`H5D0wv-z05bHUYeS26h)JT)=8l%oi zxP(wq$%$9$F89x~j^j4g&d@xRv$!xj1E|R#j|^Br3>qdrtWzY~x5yFG3n>8D(G_Z= zB~rK>{GTC|@(}kNvhj-Z9M=T0s-UYIRxCs3))w9d%z+A@BaFvYx1H&Z_J8su0;jvq zNak^f3;NSLI)4Gg^c8=%;n3G6%`d|Bt1awJ+|`iE{rk16Et|!F*jtk zhqGVMQS*9wV_eMIqXL{9Vq#j5$HMXG2f)yjn=Kz7?24p!e{r##|J#m1&JgHx&nZRi zNK)|$y*sY1{^RD4tZ(lzeUO_szRDZ%XWD)@jz^U8n4}s8?9>o?M3|MA0q590b~Smd zDLf^mnfn|rijGM*5|zK!Yi<#dte?}PqoY-zkfci#Rg#|?!W$O433UD@(M$x}EepW& z>3*tVQVx586#MBB8n35I;-2HqK7Ibyo?pXgrneMsXnWi~g`sC-9~&E<*p(T?qcPwd zt2$Pq}^kh1*6UYYMOcgX)l zzokW_@r~m>_p_3G-7&U_Gx3f5>8jtZ-eSjxz_weD#Bg4_P#j0K zIe^u3zI^cgfA?!KCukaNj23!eTj~}T=X<9|ZZs)kp)ejVYT@_G$;I{YYVua65UCYgluOIbjIlS#j&y=NC z(~N)Ze*c7hG>0i#GZDzRCX2m1I|G55bmJ65!_89=-(n5R{-h6+Cy|AgR3UgJ5i2|=#LdmE7dng(doC#e?Gx?gI66MeBqUtVU|nGtH8gY+sbMTq1>4_YZhns+ zza&iue6KW2-bq4@S&18EiDzX9ys93yEfBrLpltwee;l{Q8`mENjZQowEjs^0Va6H@ zlaAx01<&A6z_%!Pu*D;#^NmE-LEggC!^0BenjU{O?ee|eqT$#SJ+NB}V4d&ab}ENsY_{B6PVP`?@nDV+tUpp~f-S&x zk!KLOxJ$Qr&^FghubcfA;CDrgbbulv0pwW^GKm?eaVoQI%;gn>0nxfz)5Qet)TK8& zo%)!!XS-0ZfWO7?k3|V2i#=d_2sw&;rC0{=Z8?(1W}*7{vE3a|V<7K7<-2I}ijTP2 zcF*)uYd})--)cPt5L^}&K_$pp4lR;LPJXnEF;{G;T)#dbDsqb7$|72Bdw-$&d*88^ z$9tCOEGk*xt=jCy%3{2jRH*IOYU3uno#rF}IX?N(tqk_G3Z4w5{7$7*DbsfyPo=w* z1HP#YV41)*C++8LbO|INtH#Ju^&U71u3X3X5>sMNg5j79%EOAd2$y1VY~#q$0q5ZN zt)l`kdnzgyXM3;dqS6KXYRxT0*;_+G;f!|h*s0l6P zZZk;;BZF0UVB8Y$>z_El_3<&`QIV0g@Rl$JeWR+Wc|&|*DgtxHD!>a}RnxNcf3!=i z9xQ^WIXn{8App1mVboFkshf6FEp8-T?6k@f(=U2XCeLEPpU9KT-l8M!Ok%c`@Z2y0 zfpb`DqV)kcJP{S6^vA7(k>nybd7))q!e5O?NM0jjdOE!j_3BkUnw4Cpwk}mrM0Ln7 zMe3`gN2`JD>mqfewY+fkY8qrdJ5XHFOQf3dwlgse)cmx0P`rh%5jzCfzg8Py6CEpZG#|UQDm@1GRalCR^44fPeTs9h4KBl;H9hG z38J8;dF$S;+s#X`!uTs&maCYVy&)|VW~L97b_52Zc`XHL700_yKFZ=cy%*&xcjJ zD}mhE#h$+rQFJnKs3vcLzj6=r7p&0u29vBuEZbwQPE{}-e|@jYIDF{7v7_NekzQBf zoy^V1V&3;uW+ZPEu_)^kv&&8sNIHM!E6!tqUzxF%cUSK5q>I@G)RDg!6`z>E367k` zE5@m}+D_xC_GI=Z{lLeM)ft4|6K18Pe(CU&zY~-oXW^~j=FDLx)Iq*ehIV#0+rio_ zwWL-(Ev9X;euuQi8U}tNRY}u%(AWnHqc~&{&2Byl|+m zv0JYpu7uIC$G{094m6nq_zHq&qFG9UX`}PSR6op?}CN_b>TURHg(QdzXQRd;6 zpN?6MIKj9ZZ?|2k;z}A2;^*Z(Pof-9K)=vXDr(lR9cK50xnA5gVqEqQzIy!N26rPy zb61|faO?4n0g-=SQv6=(uKWA?@u^RYe_txN-2d;tOsoIzzgzPk^p+y^-{|c=+{eFv zgSGtd%D*rFN5An$P7%`G%V$3|bo}$uzgL`b3JqRKV>|r~87HwSD*i>MDIBKY(Ru)- zMRneM7P#WTu7B@GN$R8j+++&ttsf%){=xr@8#N_ + + + + + +G + + +cluster_legend + +Graphkit Legend + + + +operation + +operation + + + +graphop + +graph operation + + + + +insteps + +execution step + + + + +executed + +executed + + + + +data + +data + + + +input + +input + + + + +output + +output + + + + +inp_out + +inp+out + + + + +evicted + +evicted + + + + +pinned + +pinned + + + + +evpin + +evict+pin + + + + +sol + +in solution + + + + + +e2 + +dependency + + + +e1->e2 + + + + + +e3 + +optional + + + +e2->e3 + + + + + +e4 + +pruned dependency + + + +e3->e4 + + + + + +e5 + +execution sequence + + + +e4->e5 + + +1 + + + diff --git a/docs/source/index.rst b/docs/source/index.rst index 6b5cb690..5253b6e5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -71,8 +71,7 @@ As you can see, any function can be used as an operation in GraphKit, even ones For debugging, you may plot the workflow with one of these methods:: - graph.net.plot(show=True) # open a matplotlib window - graph.net.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg +.. figure:: images/GraphkitLegend.svg .. NOTE:: For plots, ``graphviz`` must be in your PATH, and ``pydot` & ``matplotlib`` python packages installed. diff --git a/graphkit/plot.py b/graphkit/plot.py index 057faf0d..2d97e612 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -60,12 +60,17 @@ def plot(self, filename=None, show=False, **kws): **Legend:** + . figure:: ../images/Graphkitlegend.svg + :alt: Graphkit Legend + + see :func:`legend()` + *NODES:* oval function - circle - subgraph function + egg + subgraph operation house given input inversed-house @@ -76,10 +81,12 @@ def plot(self, filename=None, show=False, **kws): intermediate data, neither given nor asked. red frame delete-instruction, to free up memory. + blue frame + pinned-instruction, not to overwrite intermediate inputs. filled data node has a value in `solution` OR function has been executed. thick frame - function/data node in `steps`. + function/data node in execution `steps`. *ARROWS* @@ -88,10 +95,10 @@ def plot(self, filename=None, show=False, **kws): sources-operations ``provide`` target-data) dashed black arrows optional needs + wheat arrows + broken dependency (``provide``) during pruning green-dotted arrows execution steps labeled in succession - wheat arrows - broken provides during pruning **Sample code:** @@ -239,7 +246,7 @@ def get_node_name(a): if steps and nx_node in steps: kw["penwdth"] = steps_thickness - shape = "oval" if isinstance(nx_node, NetworkOperation) else "oval" + shape = "egg" if isinstance(nx_node, NetworkOperation) else "oval" if executed and nx_node in executed: kw["style"] = "filled" kw["fillcolor"] = fill_color @@ -366,32 +373,29 @@ def legend(filename=None, show=None): label="Graphkit Legend"; operation [shape=oval]; - pipeline [shape=circle]; - insteps [penwidth=3 label="in steps"]; + graphop [shape=egg label="graph operation"]; + insteps [penwidth=3 label="execution step"]; executed [style=filled fillcolor=wheat]; - operation -> pipeline -> insteps -> executed [style=invis]; + operation -> graphop -> insteps -> executed [style=invis]; data [shape=rect]; input [shape=invhouse]; output [shape=house]; inp_out [shape=hexagon label="inp+out"]; evicted [shape=rect penwidth=3 color="#990000"]; - pinned [shape=rect penwidth=3 color="purple"]; + pinned [shape=rect penwidth=3 color="blue"]; evpin [shape=rect penwidth=3 color=purple label="evict+pin"]; sol [shape=rect style=filled fillcolor=wheat label="in solution"]; data -> input -> output -> inp_out -> evicted -> pinned -> evpin -> sol [style=invis]; - a1 [style=invis] b1 [color=invis label="dependency"]; - a1 -> b1; - a2 [style=invis] b2 [color=invis label="optional"]; - a2 -> b2 [style=dashed]; - a3 [style=invis] b3 [color=invis penwidth=3 label="broken dependency"]; - a3 -> b3 [color=wheat penwidth=2]; - a4 [style=invis] b4 [color=invis penwidth=4 label="steps sequence"]; - a4 -> b4 [color="#009999" penwidth=4 style=dotted arrowhead=vee]; - b1 -> a2 [style=invis]; - b2 -> a3 [style=invis]; - b3 -> a4 [style=invis]; + e1 [style=invis] e2 [color=invis label="dependency"]; + e1 -> e2; + e3 [color=invis label="optional"]; + e2 -> e3 [style=dashed]; + e4 [color=invis penwidth=3 label="pruned dependency"]; + e3 -> e4 [color=wheat penwidth=2]; + e5 [color=invis penwidth=4 label="execution sequence"]; + e4 -> e5 [color="#009999" penwidth=4 style=dotted arrowhead=vee label=1 fontcolor="#009999"]; } } """ From 5cf7189fc621f16dd7520311f9dd53bd941fd162 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 13:44:35 +0300 Subject: [PATCH 091/167] FIX(DOC): don't use `graph` in sample code, crash DOT, +more ... + FIX: `graph` is a DOT keyword, eg pydot/pydot#111 + Replaced `graph` --> `graphop`, inline with future refactoring. + Refactored example code. + New "Plotting" section under Quickstart. + Copy README instructions to main docs. + Abandoned `example_graph.svg`, replaced with `intro.svg`. + Recommend `.png` --> `.svg`, to save storage space. - discovered BUG in MERGE saumple code (empty!). --- README.md | 32 ++++--- docs/source/graph_composition.rst | 28 +++--- docs/source/images/intro.svg | 143 ++++++++++++++++++++++++++++++ docs/source/index.rst | 45 +++++++--- docs/source/operations.rst | 14 +-- graphkit/network.py | 2 +- graphkit/plot.py | 11 +-- 7 files changed, 223 insertions(+), 52 deletions(-) create mode 100644 docs/source/images/intro.svg diff --git a/README.md b/README.md index 21dca0ef..81b1657d 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,12 @@ GraphKit is a lightweight Python module for creating and running ordered graphs Here's how to install: -``` -pip install graphkit -``` + pip install graphkit + +OR with dependencies for plotting support (and you need to install [`Graphviz`](https://graphviz.org) +program separately with your OS tools):: + + pip install graphkit[plot] Here's a Python script with an example GraphKit computation graph that produces multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): @@ -32,20 +35,20 @@ def abspow(a, p): return c # Compose the mul, sub, and abspow operations into a computation graph. -graph = compose(name="graph")( +graphop = compose(name="graphop")( operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) ) # Run the graph and request all of the outputs. -out = graph({'a': 2, 'b': 5}) +out = graphop({'a': 2, 'b': 5}) # Prints "{'a': 2, 'a_minus_ab': -8, 'b': 5, 'ab': 10, 'abs_a_minus_ab_cubed': 512}". print(out) # Run the graph and request a subset of the outputs. -out = graph({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) +out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) # Prints "{'a_minus_ab': -8}". print(out) @@ -53,19 +56,20 @@ print(out) As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! -For debugging, you may plot the workflow with one of these methods: + +## Plotting + +For debugging the above graph-operation you may plot it using these methods: ```python - graph.plot(show=True) # open a matplotlib window - graph.plot("path/to/workflow.png") # supported files: .png .dot .jpg .jpeg .pdf .svg + graphop.plot(show=True, solution=out) # open a matplotlib window with solution values in nodes + graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... ``` + +![Intro graph](docs/source/images/intro.png "Intro graph") ![Graphkit Legend](docs/source/images/GraphkitLegend.svg "Graphkit Legend") -> **NOTE**: For plots, `graphviz` must be in your PATH, and `pydot` & `matplotlib` python packages installed. -> You may install both when installing *graphkit* with its `plot` extras: -> ```python -> pip install graphkit[plot] -> ``` +> **TIP:** The `pydot.Dot` instances returned by `plot()` are rendered as SVG in *Jupyter/IPython*. # License diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index ba428a14..1d8e9f6d 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -30,15 +30,15 @@ The simplest use case for ``compose`` is assembling a collection of individual o return c # Compose the mul, sub, and abspow operations into a computation graph. - graph = compose(name="graph")( + graphop = compose(name="graphop")( operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) ) -The call here to ``compose()()`` yields a runnable computation graph that looks like this (where the circles are operations, squares are data, and octagons are parameters): +The call here to ``compose()`` yields a runnable computation graph that looks like this (where the circles are operations, squares are data, and octagons are parameters): -.. image:: images/example_graph.svg +.. image:: images/intro.svg .. _graph-computations: @@ -49,7 +49,7 @@ Running a computation graph The graph composed in the example above in :ref:`simple-graph-composition` can be run by simply calling it with a dictionary argument whose keys correspond to the names of inputs to the graph and whose values are the corresponding input values. For example, if ``graph`` is as defined above, we can run it like this:: # Run the graph and request all of the outputs. - out = graph({'a': 2, 'b': 5}) + out = graphop({'a': 2, 'b': 5}) # Prints "{'a': 2, 'a_minus_ab': -8, 'b': 5, 'ab': 10, 'abs_a_minus_ab_cubed': 512}". print(out) @@ -57,10 +57,10 @@ The graph composed in the example above in :ref:`simple-graph-composition` can b Producing a subset of outputs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -By default, calling a graph on a set of inputs will yield all of that graph's outputs. You can use the ``outputs`` parameter to request only a subset. For example, if ``graph`` is as above:: +By default, calling a graph-operation on a set of inputs will yield all of that graph's outputs. You can use the ``outputs`` parameter to request only a subset. For example, if ``graphop`` is as above:: - # Run the graph and request a subset of the outputs. - out = graph({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) + # Run the graph-operation and request a subset of the outputs. + out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) # Prints "{'a_minus_ab': -8}". print(out) @@ -70,17 +70,17 @@ When using ``outputs`` to request only a subset of a graph's outputs, GraphKit e Short-circuiting a graph computation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You can short-circuit a graph computation, making certain inputs unnecessary, by providing a value in the graph that is further downstream in the graph than those inputs. For example, in the graph we've been working with, you could provide the value of ``a_minus_ab`` to make the inputs ``a`` and ``b`` unnecessary:: +You can short-circuit a graph computation, making certain inputs unnecessary, by providing a value in the graph that is further downstream in the graph than those inputs. For example, in the graph-operation we've been working with, you could provide the value of ``a_minus_ab`` to make the inputs ``a`` and ``b`` unnecessary:: - # Run the graph and request a subset of the outputs. - out = graph({'a_minus_ab': -8}) + # Run the graph-operation and request a subset of the outputs. + out = graphop({'a_minus_ab': -8}) # Prints "{'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512}". print(out) When you do this, any ``operation`` nodes that are not on a path from the downstream input to the requested outputs (i.e. predecessors of the downstream input) are not computed. For example, the ``mul1`` and ``sub1`` operations are not executed here. -This can be useful if you have a graph that accepts alternative forms of the same input. For example, if your graph requires a ``PIL.Image`` as input, you could allow your graph to be run in an API server by adding an earlier ``operation`` that accepts as input a string of raw image data and converts that data into the needed ``PIL.Image``. Then, you can either provide the raw image data string as input, or you can provide the ``PIL.Image`` if you have it and skip providing the image data string. +This can be useful if you have a graph-operation that accepts alternative forms of the same input. For example, if your graph-operation requires a ``PIL.Image`` as input, you could allow your graph to be run in an API server by adding an earlier ``operation`` that accepts as input a string of raw image data and converts that data into the needed ``PIL.Image``. Then, you can either provide the raw image data string as input, or you can provide the ``PIL.Image`` if you have it and skip providing the image data string. Adding on to an existing computation graph ------------------------------------------ @@ -109,7 +109,7 @@ Sometimes you will have two computation graphs---perhaps ones that share operati combined_graph = compose(name="combined_graph")(graph1, graph2) -However, if you want to combine graphs that share operations and don't want to pay the price of running redundant computations, you can set the ``merge`` parameter of ``compose()`` to ``True``. This will consolidate redundant ``operation`` nodes (based on ``name``) into a single node. For example, let's say we have ``graph``, as in the examples above, along with this graph:: +However, if you want to combine graphs that share operations and don't want to pay the price of running redundant computations, you can set the ``merge`` parameter of ``compose()`` to ``True``. This will consolidate redundant ``operation`` nodes (based on ``name``) into a single node. For example, let's say we have ``graphop``, as in the examples above, along with this graph:: # This graph shares the "mul1" operation with graph. another_graph = compose(name="another_graph")( @@ -117,9 +117,9 @@ However, if you want to combine graphs that share operations and don't want to p operation(name="mul2", needs=["c", "ab"], provides=["cab"])(mul) ) -We can merge ``graph`` and ``another_graph`` like so, avoiding a redundant ``mul1`` operation:: +We can merge ``graphop`` and ``another_graph`` like so, avoiding a redundant ``mul1`` operation:: - merged_graph = compose(name="merged_graph", merge=True)(graph, another_graph) + merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) This ``merged_graph`` will look like this: diff --git a/docs/source/images/intro.svg b/docs/source/images/intro.svg new file mode 100644 index 00000000..4469543f --- /dev/null +++ b/docs/source/images/intro.svg @@ -0,0 +1,143 @@ + + + + + + +G + +graphop + +cluster_after prunning + +after prunning + + + +abspow1 + +abspow1 + + + +abs_a_minus_ab_cubed + +abs_a_minus_ab_cubed + + + +abspow1->abs_a_minus_ab_cubed + + + + + +a + +a + + + +mul1 + +mul1 + + + +a->mul1 + + + + + +ab + +ab + + + +a->ab + + +4 + + + +sub1 + +sub1 + + + +a->sub1 + + + + + +b + +b + + + +mul1->b + + +1 + + + +mul1->ab + + + + + +b->mul1 + + + + + +b->sub1 + + +2 + + + +ab->sub1 + + + + + +sub1->a + + +3 + + + +a_minus_ab + +a_minus_ab + + + +sub1->a_minus_ab + + + + + +a_minus_ab->abspow1 + + + + + diff --git a/docs/source/index.rst b/docs/source/index.rst index 5253b6e5..f542da58 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -38,6 +38,12 @@ Here's how to install:: pip install graphkit +OR with dependencies for plotting support (and you need to install `Graphviz +`_ program separately with your OS tools):: + + pip install graphkit[plot] + + Here's a Python script with an example GraphKit computation graph that produces multiple outputs (``a * b``, ``a - a * b``, and ``abs(a - a * b) ** 3``):: from operator import mul, sub @@ -49,36 +55,53 @@ Here's a Python script with an example GraphKit computation graph that produces return c # Compose the mul, sub, and abspow operations into a computation graph. - graph = compose(name="graph")( + graphop = compose(name="graphop")( operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) ) - # Run the graph and request all of the outputs. - out = graph({'a': 2, 'b': 5}) + # Run the graph-operation and request all of the outputs. + out = graphop({'a': 2, 'b': 5}) # Prints "{'a': 2, 'a_minus_ab': -8, 'b': 5, 'ab': 10, 'abs_a_minus_ab_cubed': 512}". print(out) - # Run the graph and request a subset of the outputs. - out = graph({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) + # Run the graph-operation and request a subset of the outputs. + out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) # Prints "{'a_minus_ab': -8}". print(out) As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! -For debugging, you may plot the workflow with one of these methods:: + +Plotting +-------- + +For debugging the above graph-operation you may plot it using these methods:: + + graphop.plot(show=True, solution=out) # open a matplotlib window with solution values in nodes + graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... + +.. image:: images/intro.svg + :alt: Intro graph .. figure:: images/GraphkitLegend.svg + :alt: Graphkit Legend -.. NOTE:: - For plots, ``graphviz`` must be in your PATH, and ``pydot` & ``matplotlib`` python packages installed. - You may install both when installing *graphkit* with its `plot` extras:: - - pip install graphkit[plot] + The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. + +.. Tip:: + The ``pydot.Dot`` instances returned by ``plot()`` are rendered + directly in *Jupyter/IPython* notebooks as SVG images. +.. NOTE:: + For plots, `Graphviz `_ program must be in your PATH, + and ``pydot`` & ``matplotlib`` python packages installed. + You may install both when installing ``graphkit`` with its ``plot`` extras:: + + pip install graphkit[plot] License ------- diff --git a/docs/source/operations.rst b/docs/source/operations.rst index b7b4dbad..fbd6dea2 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -51,15 +51,15 @@ Let's look again at the operations from the script in :ref:`quick-start`, for ex return c # Compose the mul, sub, and abspow operations into a computation graph. - graph = compose(name="graph")( + graphop = compose(name="graphop")( operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) ) -The ``needs`` and ``provides`` arguments to the operations in this script define a computation graph that looks like this (where the circles are operations, squares are data, and octagons are parameters): +The ``needs`` and ``provides`` arguments to the operations in this script define a computation graph that looks like this (where the oval are operations, squares/houses are data): -.. image:: images/example_graph.svg +.. image:: images/intro.svg Constant operation parameters: ``params`` @@ -86,7 +86,7 @@ If you are defining your computation graph and the functions that comprise it al def foo(a, b, c): return c * (a + b) - graph = compose(name='foo_graph')(foo) + graphop = compose(name='foo_graph')(foo) Functional specification ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -99,7 +99,7 @@ If the functions underlying your computation graph operations are defined elsewh add_op = operation(name='add_op', needs=['a', 'b'], provides='sum')(add) mul_op = operation(name='mul_op', needs=['c', 'sum'], provides='product')(mul) - graph = compose(name='add_mul_graph')(add_op, mul_op) + graphop = compose(name='add_mul_graph')(add_op, mul_op) The functional specification is also useful if you want to create multiple ``operation`` instances from the same function, perhaps with different parameter values, e.g.:: @@ -111,7 +111,7 @@ The functional specification is also useful if you want to create multiple ``ope pow_op1 = operation(name='pow_op1', needs=['a'], provides='a_squared')(mypow) pow_op2 = operation(name='pow_op2', needs=['a'], params={'p': 3}, provides='a_cubed')(mypow) - graph = compose(name='two_pows_graph')(pow_op1, pow_op2) + graphop = compose(name='two_pows_graph')(pow_op1, pow_op2) A slightly different approach can be used here to accomplish the same effect by creating an operation "factory":: @@ -125,7 +125,7 @@ A slightly different approach can be used here to accomplish the same effect by pow_op1 = pow_op_factory(name='pow_op1', needs=['a'], provides='a_squared') pow_op2 = pow_op_factory(name='pow_op2', needs=['a'], params={'p': 3}, provides='a_cubed') - graph = compose(name='two_pows_graph')(pow_op1, pow_op2) + graphop = compose(name='two_pows_graph')(pow_op1, pow_op2) Modifiers on ``operation`` inputs and outputs diff --git a/graphkit/network.py b/graphkit/network.py index 3f7a607b..a0dc7663 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -110,7 +110,7 @@ class DeleteInstruction(str): It's a step in :attr:`ExecutionPlan.steps` for the data-node `str` that frees its data-value from `solution` after it is no longer needed, - to reduce memory footprint while computing the pipeline. + to reduce memory footprint while computing the graph. """ def __repr__(self): return 'DeleteInstruction("%s")' % self diff --git a/graphkit/plot.py b/graphkit/plot.py index 2d97e612..69138d06 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -105,17 +105,18 @@ def plot(self, filename=None, show=False, **kws): >>> from graphkit import compose, operation >>> from graphkit.modifiers import optional - >>> pipeline = compose(name="pipeline")( + >>> graphop = compose(name="graphop")( ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), ... operation(name="sub", needs=["a", optional("b2")], provides=["ab2"])(lambda a, b=1: a-b), ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), ... ) + >>> graphop.plot(show=True); # plot just the graph in a matplotlib window >>> inputs = {'a': 1, 'b1': 2} - >>> solution=pipeline(inputs) + >>> solution = graphop(inputs) # now plots will include the execution-plan - >>> pipeline.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); - >>> pipeline.last_plan.plot('plot2.svg', solution=solution); + >>> graphop.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); + >>> graphop.plot(solution=solution) # just get the `pydoit.Dot` object, renderable in Jupyter """ dot = self._build_pydot(**kws) return render_pydot(dot, filename=filename, show=show) @@ -303,7 +304,7 @@ def get_node_name(a): def supported_plot_formats(): - """return automatically all `pydot` extensions withlike ``.png``""" + """return automatically all `pydot` extensions""" import pydot return [".%s" % f for f in pydot.Dot().formats] From 7f637fe82353b607e4fc230e7ec6952f071314f8 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 13:47:10 +0300 Subject: [PATCH 092/167] chore(git): .gitignore plot images in root folder --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index db4561ea..ce3d241b 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,8 @@ docs/_build/ # PyBuilder target/ + +# Plots genersated when running sample code +/*.png +/*.svg +/*.pdf \ No newline at end of file From ae0116356d18ab9116395559cfe4cca69aef384b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 14:51:51 +0300 Subject: [PATCH 093/167] chore(TCs): mark SLOW tests, when in hurry, +`setup.cfg` ... to adopt pytest args + mark wheel a universal. --- .travis.yml | 2 ++ setup.cfg | 10 ++++++++++ test/test_graphkit.py | 2 ++ test/test_plot.py | 1 + 4 files changed, 15 insertions(+) create mode 100644 setup.cfg diff --git a/.travis.yml b/.travis.yml index cbd8cf82..025017a7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,8 @@ install: script: - pytest -v --cov=graphkit + # In case you adopt -m 'not slow' in setup.cfg. + #- pytest -vm slow --cov=graphkit deploy: provider: pypi diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..2e5ce9fc --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +## Python's setup.cfg for tool defaults: +# +[bdist_wheel] +universal = 1 + + +[tool:pytest] +# See http://doc.pytest.org/en/latest/mark.html#mark +markers = + slow: marks tests as slow, select them with `-m slow` or `-m 'not slow'` diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 693addd0..1429c4b2 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -578,6 +578,7 @@ def count_deletions(steps): assert count_deletions(steps12) != count_deletions(steps22) +@pytest.mark.slow def test_parallel_execution(): import time @@ -633,6 +634,7 @@ def fn3(z, k=1): # make sure results are the same using either method assert result_sequential == result_threaded +@pytest.mark.slow def test_multi_threading(): import time import random diff --git a/test/test_plot.py b/test/test_plot.py index 37b3c211..d17201cb 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -52,6 +52,7 @@ def test_plotting_docstring(): assert ext in network.Network.plot.__doc__ +@pytest.mark.slow def test_plot_formats(pipeline, tmp_path): ## Generate all formats (not needing to save files) From 64838a598de706455807541d282b0d911612894d Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 18:24:19 +0300 Subject: [PATCH 094/167] FIX(TCs): MERGE TCs were not ASSERTING... got values from v1.2.4 - ALL MERGE TCs FAIL! --- test/test_graphkit.py | 78 +++++++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 17 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 1429c4b2..dc9e0a6d 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -33,13 +33,13 @@ def filtdict(d, *keys): return type(d)(i for i in d.items() if i[0] in keys) -def test_network(): +def test_network_smoke(): # Sum operation, late-bind compute function sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum_ab')(add) # sum_op1 is callable - print(sum_op1(1, 2)) + assert sum_op1(1, 2) == 3 # Multiply operation, decorate in-place @operation(name='mul_op1', needs=['sum_ab', 'b'], provides='sum_ab_times_b') @@ -47,14 +47,14 @@ def mul_op1(a, b): return a * b # mul_op1 is callable - print(mul_op1(1, 2)) + assert mul_op1(1, 2) == 2 # Pow operation @operation(name='pow_op1', needs='sum_ab', provides=['sum_ab_p1', 'sum_ab_p2', 'sum_ab_p3'], params={'exponent': 3}) def pow_op1(a, exponent=2): return [math.pow(a, y) for y in range(1, exponent+1)] - print(pow_op1._compute({'sum_ab':2}, ['sum_ab_p2'])) + assert pow_op1._compute({'sum_ab':2}, ['sum_ab_p2']) == {'sum_ab_p2': 4.0} # Partial operation that is bound at a later time partial_op = operation(name='sum_op2', needs=['sum_ab_p1', 'sum_ab_p2'], provides='p1_plus_p2') @@ -68,7 +68,7 @@ def pow_op1(a, exponent=2): sum_op3 = sum_op_factory(name='sum_op3', needs=['a', 'b'], provides='sum_ab2') # sum_op3 is callable - print(sum_op3(5, 6)) + assert sum_op3(5, 6) == 11 # compose network net = compose(name='my network')(sum_op1, mul_op1, pow_op1, sum_op2, sum_op3) @@ -77,14 +77,25 @@ def pow_op1(a, exponent=2): # Running the network # - # # get all outputs - # pprint(net({'a': 1, 'b': 2})) + # get all outputs + exp = {'a': 1, + 'b': 2, + 'p1_plus_p2': 12.0, + 'sum_ab': 3, + 'sum_ab2': 3, + 'sum_ab_p1': 3.0, + 'sum_ab_p2': 9.0, + 'sum_ab_p3': 27.0, + 'sum_ab_times_b': 6} + assert net({'a': 1, 'b': 2}) == exp - # # get specific outputs - # pprint(net({'a': 1, 'b': 2}, outputs=["sum_ab_times_b"])) + # get specific outputs + exp = {'sum_ab_times_b': 6} + assert net({'a': 1, 'b': 2}, outputs=["sum_ab_times_b"]) == exp # start with inputs already computed - pprint(net({"sum_ab": 1, "b": 2}, outputs=["sum_ab_times_b"])) + exp = {'sum_ab_times_b': 2} + assert net({"sum_ab": 1, "b": 2}, outputs=["sum_ab_times_b"]) == exp # visualize network graph # net.plot(show=True) @@ -96,15 +107,23 @@ def test_network_simple_merge(): sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) - pprint(net1({'a': 1, 'b': 2, 'c': 4})) + + exp = {'a': 1, 'b': 2, 'c': 4, 'sum1': 3, 'sum2': 3, 'sum3': 7} + sol = net1({'a': 1, 'b': 2, 'c': 4}) + assert sol == exp sum_op4 = operation(name='sum_op1', needs=['d', 'e'], provides='a')(add) sum_op5 = operation(name='sum_op2', needs=['a', 'f'], provides='b')(add) + net2 = compose(name='my network 2')(sum_op4, sum_op5) - pprint(net2({'d': 1, 'e': 2, 'f': 4})) + exp = {'a': 3, 'b': 7, 'd': 1, 'e': 2, 'f': 4} + sol = net2({'d': 1, 'e': 2, 'f': 4}) + assert sol == exp net3 = compose(name='merged')(net1, net2) - pprint(net3({'c': 5, 'd': 1, 'e': 2, 'f': 4})) + exp = {'a': 3, 'b': 7, 'c': 5, 'd': 1, 'e': 2, 'f': 4, 'sum1': 10, 'sum2': 10, 'sum3': 15} + sol = net3({'c': 5, 'd': 1, 'e': 2, 'f': 4}) + assert sol == exp def test_network_deep_merge(): @@ -113,15 +132,40 @@ def test_network_deep_merge(): sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) - pprint(net1({'a': 1, 'b': 2, 'c': 4})) + + exp = {'a': 1, 'b': 2, 'c': 4, 'sum1': 3, 'sum2': 3, 'sum3': 7} + assert net1({'a': 1, 'b': 2, 'c': 4}) == exp sum_op4 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) sum_op5 = operation(name='sum_op4', needs=['sum1', 'b'], provides='sum2')(add) net2 = compose(name='my network 2')(sum_op4, sum_op5) - pprint(net2({'a': 1, 'b': 2})) + exp = {'a': 1, 'b': 2, 'sum1': 3, 'sum2': 5} + assert net2({'a': 1, 'b': 2}) == exp net3 = compose(name='merged', merge=True)(net1, net2) - pprint(net3({'a': 1, 'b': 2, 'c': 4})) + exp = {'a': 1, 'b': 2, 'c': 4, 'sum1': 3, 'sum2': 3, 'sum3': 7} + assert net3({'a': 1, 'b': 2, 'c': 4}) == exp + + +def test_network_merge_in_doctests(): + def abspow(a, p): + c = abs(a) ** p + return c + + graphop = compose(name="graphop")( + operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), + operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3}) + (abspow) + ) + + another_graph = compose(name="another_graph")( + operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + operation(name="mul2", needs=["c", "ab"], provides=["cab"])(mul) + ) + merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) + assert merged_graph.needs + assert merged_graph.provides def test_input_based_pruning(): @@ -293,7 +337,7 @@ def test_pruning_multiouts_not_override_intermediates1(): def test_pruning_multiouts_not_override_intermediates2(): # Test #25: v.1.2.4 overrides intermediate data when a previous operation # must run for its other outputs (outputs asked or not) - # SPURIOUS FAILS in < PY3.6 due to unordered dicts, + # SPURIOUS FAILS in < PY3.6 due to unordered dicts, # eg https://travis-ci.org/ankostis/graphkit/jobs/594813119 pipeline = compose(name="pipeline")( operation(name="must run", needs=["a"], provides=["overriden", "e"]) From 8c410664a4bdc4f3bcda1576b9006068566ff257 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 18:28:20 +0300 Subject: [PATCH 095/167] FIX(MERGE): broken by NEW_DAG_SOLVER (#26 ... many commits ago. Never got it bc TC were not checking merges! --- graphkit/functional.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/graphkit/functional.py b/graphkit/functional.py index 84f92f84..5b3735fe 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -1,9 +1,7 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. - -from itertools import chain - from boltons.setutils import IndexedSet as iset +import networkx as nx from .base import Operation, NetworkOperation from .network import Network @@ -190,17 +188,16 @@ def __call__(self, *operations): merge_set = iset() # Preseve given node order. for op in operations: if isinstance(op, NetworkOperation): - plan = op.net.compile() - merge_set.update(s for s in plan.steps - if isinstance(s, Operation)) + netop_nodes = nx.topological_sort(op.net.graph) + merge_set.update(s for s in netop_nodes if isinstance(s, Operation)) else: merge_set.add(op) operations = merge_set - provides = iset(chain(*[op.provides for op in operations])) + provides = iset(p for op in operations for p in op.provides) # Mark them all as optional, now that #18 calmly ignores # non-fully satisfied operations. - needs = iset(chain(*[optional(n) for op in operations for n in op.needs ])) - provides + needs = iset(optional(n) for op in operations for n in op.needs) - provides # Build network net = Network() From 3e06148230d7f53d96586bd31abc4fd372cfaac7 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 01:02:31 +0300 Subject: [PATCH 096/167] FIX(sideffect.TX): old TC was not with unsatisfied in mind, needed to give the sidefeect as input. --- test/test_graphkit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 4eca826a..f5f19622 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -517,7 +517,7 @@ def increment(box): provides=modifiers.sideffect('c'))(increment), ) - assert graph({'box': [0]})['box'] == [1, 2, 3] + assert graph({'box': [0], 'a': True})['box'] == [1, 2, 3] # Reverse order of functions. graph = compose('mygraph')( @@ -531,7 +531,7 @@ def increment(box): provides=[modifiers.sideffect('c')])(extend), ) - assert graph({'box': [0]})['box'] == [1, 1, 2] + assert graph({'box': [0], 'a': None})['box'] == [1, 1, 2] def test_optional_per_function_with_same_output(): From b515d507c4f26896dab5d715d41b30ad5c6d9392 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 8 Oct 2019 15:44:46 +0300 Subject: [PATCH 097/167] FEAT(TCs): +check DOCTESTs ... + feat: pytest-doctest configs in `setup.cfg`. + enh(travis): run doctests only in latest python 3.7. + Enabled & FIX doctests in: + in README + in docs/sources + in code --- .travis.yml | 13 +++-- README.md | 55 +++++++++---------- docs/source/graph_composition.rst | 87 ++++++++++++++++--------------- docs/source/operations.rst | 68 ++++++++++++------------ graphkit/base.py | 21 +++++--- graphkit/modifiers.py | 63 +++++++++++----------- graphkit/network.py | 6 +-- graphkit/plot.py | 20 ++++--- setup.cfg | 14 +++++ test/test_graphkit.py | 2 +- 10 files changed, 196 insertions(+), 153 deletions(-) diff --git a/.travis.yml b/.travis.yml index 025017a7..bb604dfa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,9 +21,16 @@ install: - cd .. script: - - pytest -v --cov=graphkit - # In case you adopt -m 'not slow' in setup.cfg. - #- pytest -vm slow --cov=graphkit + # OVERRIDE pytest-defaults adopted in `setup.cfg`: + # + # Run doctests in latest Python; certainly not < PY3.6 due to unstable dicts. + # Also give `-m 'slow or not slow'` since `not slow` adopted in `setup.cfg`. + - | + if [[ "$TRAVIS_PYTHON_VERSION" = '3.7' ]]; then + pytest -v --cov=graphkit -m 'slow or not slow' + else + pytest -v --cov=graphkit test/ + fi deploy: provider: pypi diff --git a/README.md b/README.md index 81b1657d..2559dbf9 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,10 @@ ## Lightweight computation graphs for Python -GraphKit is a lightweight Python module for creating and running ordered graphs of computations, where the nodes of the graph correspond to computational operations, and the edges correspond to output --> input dependencies between those operations. Such graphs are useful in computer vision, machine learning, and many other domains. +GraphKit is a lightweight Python module for creating and running ordered graphs of computations, +where the nodes of the graph correspond to computational operations, and the edges +correspond to output --> input dependencies between those operations. +Such graphs are useful in computer vision, machine learning, and many other domains. ## Quick start @@ -23,36 +26,34 @@ program separately with your OS tools):: pip install graphkit[plot] -Here's a Python script with an example GraphKit computation graph that produces multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): +Here's a Python script with an example GraphKit computation graph that produces +multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): -``` -from operator import mul, sub -from graphkit import compose, operation - -# Computes |a|^p. -def abspow(a, p): - c = abs(a) ** p - return c +>>> from operator import mul, sub +>>> from graphkit import compose, operation -# Compose the mul, sub, and abspow operations into a computation graph. -graphop = compose(name="graphop")( - operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), - operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), - operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) -) +>>> # Computes |a|^p. +>>> def abspow(a, p): +... c = abs(a) ** p +... return c -# Run the graph and request all of the outputs. -out = graphop({'a': 2, 'b': 5}) +>>> # Compose the mul, sub, and abspow operations into a computation graph. +>>> graphop = compose(name="graphop")( +... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), +... operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), +... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) +... ) -# Prints "{'a': 2, 'a_minus_ab': -8, 'b': 5, 'ab': 10, 'abs_a_minus_ab_cubed': 512}". -print(out) +>>> # Run the graph and request all of the outputs. +>>> out = graphop({'a': 2, 'b': 5}) +>>> print(out) +{'a': 2, 'b': 5, 'ab': 10, 'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512} -# Run the graph and request a subset of the outputs. -out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) +>>> # Run the graph and request a subset of the outputs. +>>> out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) +>>> print(out) +{'a_minus_ab': -8} -# Prints "{'a_minus_ab': -8}". -print(out) -``` As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! @@ -63,10 +64,10 @@ For debugging the above graph-operation you may plot it using these methods: ```python graphop.plot(show=True, solution=out) # open a matplotlib window with solution values in nodes - graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... + graphop.plot("intro.svg") # original graph; other formats: png, jpg, pdf, ... ``` -![Intro graph](docs/source/images/intro.png "Intro graph") +![Intro graph](docs/source/images/intro.svg "Intro graph") ![Graphkit Legend](docs/source/images/GraphkitLegend.svg "Graphkit Legend") > **TIP:** The `pydot.Dot` instances returned by `plot()` are rendered as SVG in *Jupyter/IPython*. diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index 1d8e9f6d..a1f99756 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -21,20 +21,20 @@ Simple composition of operations The simplest use case for ``compose`` is assembling a collection of individual operations into a runnable computation graph. The example script from :ref:`quick-start` illustrates this well:: - from operator import mul, sub - from graphkit import compose, operation + >>> from operator import mul, sub + >>> from graphkit import compose, operation - # Computes |a|^p. - def abspow(a, p): - c = abs(a) ** p - return c + >>> # Computes |a|^p. + >>> def abspow(a, p): + ... c = abs(a) ** p + ... return c - # Compose the mul, sub, and abspow operations into a computation graph. - graphop = compose(name="graphop")( - operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), - operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), - operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) - ) + >>> # Compose the mul, sub, and abspow operations into a computation graph. + >>> graphop = compose(name="graphop")( + ... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + ... operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), + ... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) + ... ) The call here to ``compose()`` yields a runnable computation graph that looks like this (where the circles are operations, squares are data, and octagons are parameters): @@ -46,13 +46,15 @@ The call here to ``compose()`` yields a runnable computation graph that looks li Running a computation graph --------------------------- -The graph composed in the example above in :ref:`simple-graph-composition` can be run by simply calling it with a dictionary argument whose keys correspond to the names of inputs to the graph and whose values are the corresponding input values. For example, if ``graph`` is as defined above, we can run it like this:: +The graph composed in the example above in :ref:`simple-graph-composition` can be run +by simply calling it with a dictionary argument whose keys correspond to the names of inputs +to the graph and whose values are the corresponding input values. +For example, if ``graph`` is as defined above, we can run it like this:: # Run the graph and request all of the outputs. - out = graphop({'a': 2, 'b': 5}) - - # Prints "{'a': 2, 'a_minus_ab': -8, 'b': 5, 'ab': 10, 'abs_a_minus_ab_cubed': 512}". - print(out) + >>> out = graphop({'a': 2, 'b': 5}) + >>> out + {'a': 2, 'b': 5, 'ab': 10, 'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512} Producing a subset of outputs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -60,10 +62,9 @@ Producing a subset of outputs By default, calling a graph-operation on a set of inputs will yield all of that graph's outputs. You can use the ``outputs`` parameter to request only a subset. For example, if ``graphop`` is as above:: # Run the graph-operation and request a subset of the outputs. - out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) - - # Prints "{'a_minus_ab': -8}". - print(out) + >>> out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) + >>> out + {'a_minus_ab': -8} When using ``outputs`` to request only a subset of a graph's outputs, GraphKit executes only the ``operation`` nodes in the graph that are on a path from the inputs to the requested outputs. For example, the ``abspow1`` operation will not be executed here. @@ -73,10 +74,9 @@ Short-circuiting a graph computation You can short-circuit a graph computation, making certain inputs unnecessary, by providing a value in the graph that is further downstream in the graph than those inputs. For example, in the graph-operation we've been working with, you could provide the value of ``a_minus_ab`` to make the inputs ``a`` and ``b`` unnecessary:: # Run the graph-operation and request a subset of the outputs. - out = graphop({'a_minus_ab': -8}) - - # Prints "{'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512}". - print(out) + >>> out = graphop({'a_minus_ab': -8}) + >>> out + {'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512} When you do this, any ``operation`` nodes that are not on a path from the downstream input to the requested outputs (i.e. predecessors of the downstream input) are not computed. For example, the ``mul1`` and ``sub1`` operations are not executed here. @@ -87,16 +87,18 @@ Adding on to an existing computation graph Sometimes you will have an existing computation graph to which you want to add operations. This is simple, since ``compose`` can compose whole graphs along with individual ``operation`` instances. For example, if we have ``graph`` as above, we can add another operation to it to create a new graph:: - # Add another subtraction operation to the graph. - bigger_graph = compose(name="bigger_graph")( - graph, - operation(name="sub2", needs=["a_minus_ab", "c"], provides="a_minus_ab_minus_c")(sub) - ) + >>> # Add another subtraction operation to the graph. + >>> bigger_graph = compose(name="bigger_graph")( + ... graphop, + ... operation(name="sub2", needs=["a_minus_ab", "c"], provides="a_minus_ab_minus_c")(sub) + ... ) - # Run the graph and print the output. Prints "{'a_minus_ab_minus_c': -13}" - print(bigger_graph({'a': 2, 'b': 5, 'c': 5}, outputs=["a_minus_ab_minus_c"])) + >>> # Run the graph and print the output. + >>> sol = bigger_graph({'a': 2, 'b': 5, 'c': 5}, outputs=["a_minus_ab_minus_c"]) + >>> sol + {'a_minus_ab_minus_c': -13} -This yields a graph that looks like this: +This yields a graph which looks like this (see :ref:`Plotting`): .. image:: images/bigger_example_graph.svg @@ -111,15 +113,18 @@ Sometimes you will have two computation graphs---perhaps ones that share operati However, if you want to combine graphs that share operations and don't want to pay the price of running redundant computations, you can set the ``merge`` parameter of ``compose()`` to ``True``. This will consolidate redundant ``operation`` nodes (based on ``name``) into a single node. For example, let's say we have ``graphop``, as in the examples above, along with this graph:: - # This graph shares the "mul1" operation with graph. - another_graph = compose(name="another_graph")( - operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), - operation(name="mul2", needs=["c", "ab"], provides=["cab"])(mul) - ) + >>> # This graph shares the "mul1" operation with graph. + >>> another_graph = compose(name="another_graph")( + ... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + ... operation(name="mul2", needs=["c", "ab"], provides=["cab"])(mul) + ... ) We can merge ``graphop`` and ``another_graph`` like so, avoiding a redundant ``mul1`` operation:: - merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) + >>> merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) + >>> print(merged_graph) + NetworkOperation(name='merged_graph', needs=IndexedSet(['a', 'b', 'c']), + provides=IndexedSet(['ab', 'a_minus_ab', 'abs_a_minus_ab_cubed', 'cab'])) This ``merged_graph`` will look like this: @@ -127,5 +132,5 @@ This ``merged_graph`` will look like this: As always, we can run computations with this graph by simply calling it:: - # Prints "{'cab': 50}". - print(merged_graph({'a': 2, 'b': 5, 'c': 5}, outputs=["cab"])) + >>> merged_graph({'a': 2, 'b': 5, 'c': 5}, outputs=["cab"]) + {'cab': 50} diff --git a/docs/source/operations.rst b/docs/source/operations.rst index cd9afb39..0376a7a1 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -20,13 +20,13 @@ Operations are just functions At the heart of each ``operation`` is just a function, any arbitrary function. Indeed, you can instantiate an ``operation`` with a function and then call it just like the original function, e.g.:: - from operator import add - from graphkit import operation + >>> from operator import add + >>> from graphkit import operation - add_op = operation(name='add_op', needs=['a', 'b'], provides=['a_plus_b'])(add) + >>> add_op = operation(name='add_op', needs=['a', 'b'], provides=['a_plus_b'])(add) - # Passes! - assert add_op(3, 4) == add(3, 4) + >>> add_op(3, 4) == add(3, 4) + True Specifying graph structure: ``provides`` and ``needs`` @@ -42,20 +42,20 @@ When many operations are composed into a computation graph (see :ref:`graph-comp Let's look again at the operations from the script in :ref:`quick-start`, for example:: - from operator import mul, sub - from graphkit import compose, operation + >>> from operator import mul, sub + >>> from graphkit import compose, operation - # Computes |a|^p. - def abspow(a, p): - c = abs(a) ** p - return c + >>> # Computes |a|^p. + >>> def abspow(a, p): + ... c = abs(a) ** p + ... return c - # Compose the mul, sub, and abspow operations into a computation graph. - graphop = compose(name="graphop")( - operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), - operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), - operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) - ) + >>> # Compose the mul, sub, and abspow operations into a computation graph. + >>> graphop = compose(name="graphop")( + ... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + ... operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), + ... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) + ... ) The ``needs`` and ``provides`` arguments to the operations in this script define a computation graph that looks like this (where the oval are operations, squares/houses are data): @@ -80,38 +80,38 @@ Decorator specification If you are defining your computation graph and the functions that comprise it all in the same script, the decorator specification of ``operation`` instances might be particularly useful, as it allows you to assign computation graph structure to functions as they are defined. Here's an example:: - from graphkit import operation, compose + >>> from graphkit import operation, compose - @operation(name='foo_op', needs=['a', 'b', 'c'], provides='foo') - def foo(a, b, c): - return c * (a + b) + >>> @operation(name='foo_op', needs=['a', 'b', 'c'], provides='foo') + ... def foo(a, b, c): + ... return c * (a + b) - graphop = compose(name='foo_graph')(foo) + >>> graphop = compose(name='foo_graph')(foo) Functional specification ^^^^^^^^^^^^^^^^^^^^^^^^ If the functions underlying your computation graph operations are defined elsewhere than the script in which your graph itself is defined (e.g. they are defined in another module, or they are system functions), you can use the functional specification of ``operation`` instances:: - from operator import add, mul - from graphkit import operation, compose + >>> from operator import add, mul + >>> from graphkit import operation, compose - add_op = operation(name='add_op', needs=['a', 'b'], provides='sum')(add) - mul_op = operation(name='mul_op', needs=['c', 'sum'], provides='product')(mul) + >>> add_op = operation(name='add_op', needs=['a', 'b'], provides='sum')(add) + >>> mul_op = operation(name='mul_op', needs=['c', 'sum'], provides='product')(mul) - graphop = compose(name='add_mul_graph')(add_op, mul_op) + >>> graphop = compose(name='add_mul_graph')(add_op, mul_op) The functional specification is also useful if you want to create multiple ``operation`` instances from the same function, perhaps with different parameter values, e.g.:: - from graphkit import operation, compose + >>> from graphkit import operation, compose - def mypow(a, p=2): - return a ** p + >>> def mypow(a, p=2): + ... return a ** p - pow_op1 = operation(name='pow_op1', needs=['a'], provides='a_squared')(mypow) - pow_op2 = operation(name='pow_op2', needs=['a'], params={'p': 3}, provides='a_cubed')(mypow) + >>> pow_op1 = operation(name='pow_op1', needs=['a'], provides='a_squared')(mypow) + >>> pow_op2 = operation(name='pow_op2', needs=['a'], params={'p': 3}, provides='a_cubed')(mypow) - graphop = compose(name='two_pows_graph')(pow_op1, pow_op2) + >>> graphop = compose(name='two_pows_graph')(pow_op1, pow_op2) A slightly different approach can be used here to accomplish the same effect by creating an operation "factory":: @@ -134,4 +134,4 @@ Modifiers on ``operation`` inputs and outputs Certain modifiers are available to apply to input or output values in ``needs`` and ``provides``, for example to designate an optional input. These modifiers are available in the ``graphkit.modifiers`` module: .. autoclass:: graphkit.modifiers.optional -.. autoclass:: graphkit.modifiers.token +.. autoclass:: graphkit.modifiers.sideffect diff --git a/graphkit/base.py b/graphkit/base.py index 5b3367bd..15652cfe 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -44,17 +44,21 @@ def __init__(self, **kwargs): important when connecting layers and data in a Network object, as the names are used to construct the graph. - :param str name: The name the operation (e.g. conv1, conv2, etc..) + :param str name: + The name the operation (e.g. conv1, conv2, etc..) - :param list needs: Names of input data objects this layer requires. + :param list needs: + Names of input data objects this layer requires. - :param list provides: Names of output data objects this provides. + :param list provides: + Names of output data objects this provides. - :param dict params: A dict of key/value pairs representing parameters - associated with your operation. These values will be - accessible using the ``.params`` attribute of your object. - NOTE: It's important that any values stored in this - argument must be pickelable. + :param dict params: + A dict of key/value pairs representing parameters + associated with your operation. These values will be + accessible using the ``.params`` attribute of your object. + NOTE: It's important that any values stored in this + argument must be pickelable. """ # (Optional) names for this layer, and the data it needs and provides @@ -85,6 +89,7 @@ def compute(self, inputs): """ This method must be implemented to perform this layer's feed-forward computation on a given set of inputs. + :param list inputs: A list of :class:`Data` objects on which to run the layer's feed-forward computation. diff --git a/graphkit/modifiers.py b/graphkit/modifiers.py index 094636fd..18c75fec 100644 --- a/graphkit/modifiers.py +++ b/graphkit/modifiers.py @@ -20,21 +20,23 @@ class optional(str): Here is an example of an operation that uses an optional argument:: - from graphkit import operation, compose - from graphkit.modifiers import optional + >>> from graphkit import operation, compose + >>> from graphkit.modifiers import optional - # Function that adds either two or three numbers. - def myadd(a, b, c=0): - return a + b + c + >>> # Function that adds either two or three numbers. + >>> def myadd(a, b, c=0): + ... return a + b + c - # Designate c as an optional argument. - graph = compose('mygraph')( - operation(name='myadd', needs=['a', 'b', optional('c')], provides='sum')(myadd) - ) + >>> # Designate c as an optional argument. + >>> graph = compose('mygraph')( + ... operation(name='myadd', needs=['a', 'b', optional('c')], provides='sum')(myadd) + ... ) - # The graph works with and without 'c' provided as input. - assert graph({'a': 5, 'b': 2, 'c': 4})['sum'] == 11 - assert graph({'a': 5, 'b': 2})['sum'] == 7 + >>> # The graph works with and without 'c' provided as input. + >>> graph({'a': 5, 'b': 2, 'c': 4})['sum'] + 11 + >>> graph({'a': 5, 'b': 2}) + {'a': 5, 'b': 2, 'sum': 7} """ @@ -59,24 +61,25 @@ class sideffect(str): A typical use case is to signify columns required to produce new ones in pandas dataframes:: - from graphkit import operation, compose - from graphkit.modifiers import sideffect - - # Function appending a new dataframe column from two pre-existing ones. - def addcolumns(df): - df['sum'] = df['a'] + df['b'] - - # Designate `a`, `b` & `sum` column names as an sideffect arguments. - graph = compose('mygraph')( - operation( - name='addcolumns', - needs=['df', sideffect('a'), sideffect('b')], - provides=[sideffect('sum')])(addcolumns) - ) - - # The graph works with and without 'c' provided as input. - df = pd.DataFrame({'a': [5], 'b': [2]}) - assert graph({'df': df})['sum'] == 11 + >>> from graphkit import operation, compose + >>> from graphkit.modifiers import sideffect + + >>> # Function appending a new dataframe column from two pre-existing ones. + >>> def addcolumns(df): + ... df['sum'] = df['a'] + df['b'] + + >>> # Designate `a`, `b` & `sum` column names as an sideffect arguments. + >>> graph = compose('mygraph')( + ... operation( + ... name='addcolumns', + ... needs=['df', sideffect('a'), sideffect('b')], + ... provides=[sideffect('sum')])(addcolumns) + ... ) + + >>> # The graph works with and without 'c' provided as input. + >>> df = pd.DataFrame({'a': [5], 'b': [2]}) # doctest: +SKIP + >>> graph({'df': df})['sum'] == 11 # doctest: +SKIP + True """ diff --git a/graphkit/network.py b/graphkit/network.py index 2a80437a..706b87c3 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -85,12 +85,12 @@ if sys.version_info < (3, 6): """ Consistently ordered variant of :class:`~networkx.DiGraph`. - + PY3.6 has inmsertion-order dicts, but PY3.5 has not. And behvavior *and TCs) in these environments may fail spuriously! Still *subgraphs* may not patch! - Fix from: + Fix from: https://networkx.github.io/documentation/latest/reference/classes/ordered.html#module-networkx.classes.ordered """ from networkx import OrderedDiGraph as DiGraph @@ -182,7 +182,7 @@ def add_op(self, operation): # functionalOperations don't have that set. if not operation.net: operation.net = self - + # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: if isinstance(n, optional): diff --git a/graphkit/plot.py b/graphkit/plot.py index 69138d06..19b1cdeb 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -1,6 +1,6 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. - +""" Plotting graphkit graps""" import io import logging import os @@ -91,8 +91,8 @@ def plot(self, filename=None, show=False, **kws): *ARROWS* solid black arrows - dependencies (source-data are``need``-ed by target-operations, - sources-operations ``provide`` target-data) + dependencies (source-data *need*-ed by target-operations, + sources-operations *provides* target-data) dashed black arrows optional needs wheat arrows @@ -104,6 +104,7 @@ def plot(self, filename=None, show=False, **kws): >>> from graphkit import compose, operation >>> from graphkit.modifiers import optional + >>> from operator import add >>> graphop = compose(name="graphop")( ... operation(name="add", needs=["a", "b1"], provides=["ab1"])(add), @@ -111,12 +112,19 @@ def plot(self, filename=None, show=False, **kws): ... operation(name="abb", needs=["ab1", "ab2"], provides=["asked"])(add), ... ) - >>> graphop.plot(show=True); # plot just the graph in a matplotlib window + >>> graphop.plot(show=True); # plot just the graph in a matplotlib window # doctest: +SKIP >>> inputs = {'a': 1, 'b1': 2} >>> solution = graphop(inputs) # now plots will include the execution-plan - >>> graphop.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); - >>> graphop.plot(solution=solution) # just get the `pydoit.Dot` object, renderable in Jupyter + >>> graphop.plot('plot1.svg', inputs=inputs, outputs=['asked', 'b1'], solution=solution); # doctest: +SKIP + >>> dot = graphop.plot(solution=solution); # just get the `pydoit.Dot` object, renderable in Jupyter + >>> print(dot) + digraph G { + fontname=italic; + label=graphop; + a [fillcolor=wheat, shape=invhouse, style=filled]; + ... + ... """ dot = self._build_pydot(**kws) return render_pydot(dot, filename=filename, show=show) diff --git a/setup.cfg b/setup.cfg index 2e5ce9fc..c9641347 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,3 +8,17 @@ universal = 1 # See http://doc.pytest.org/en/latest/mark.html#mark markers = slow: marks tests as slow, select them with `-m slow` or `-m 'not slow'` + +# TODO: enable doctests in README.md. +addopts = --ignore setup.py + # faciltate developer + -m 'not slow' + --doctest-report ndiff + --doctest-continue-on-failure + # --doctest-ignore-import-errors + --doctest-modules + --doctest-glob=*.md + --doctest-glob=*.rst + --cov-fail-under=80 +doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS + diff --git a/test/test_graphkit.py b/test/test_graphkit.py index f5f19622..45ddbaba 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -28,7 +28,7 @@ def filtdict(d, *keys): Keep dict items with the given keys >>> filtdict({"a": 1, "b": 2}, "b") - {"b": 2} + {'b': 2} """ return type(d)(i for i in d.items() if i[0] in keys) From 34543c96a08f8e388efe3e905ac7a4a3362ed94f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 10:38:52 +0300 Subject: [PATCH 098/167] fix(TCs): enable assertions in compatibility TCs --- test/test_graphkit.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 45ddbaba..0856f3ca 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -831,10 +831,20 @@ def test_backwards_compatibility(): # # get all outputs - pprint(net.compute(outputs=None, named_inputs={'a': 1, 'b': 2})) + exp = {'a': 1, + 'b': 2, + 'p1_plus_p2': 12.0, + 'sum_ab': 3, + 'sum_ab_p1': 3.0, + 'sum_ab_p2': 9.0, + 'sum_ab_p3': 27.0, + 'sum_ab_times_b': 6} + assert net.compute(outputs=None, named_inputs={'a': 1, 'b': 2}) == exp # get specific outputs - pprint(net.compute(outputs=["sum_ab_times_b"], named_inputs={'a': 1, 'b': 2})) + exp = {'sum_ab_times_b': 6} + assert net.compute(outputs=["sum_ab_times_b"], named_inputs={'a': 1, 'b': 2}) == exp # start with inputs already computed - pprint(net.compute(outputs=["sum_ab_times_b"], named_inputs={"sum_ab": 1, "b": 2})) + exp = {'sum_ab_times_b': 2} + assert net.compute(outputs=["sum_ab_times_b"], named_inputs={"sum_ab": 1, "b": 2}) == exp From c104a1745eec9f375df8a0a0d75dbcb912ef79ef Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 12:30:55 +0300 Subject: [PATCH 099/167] FIX(func, TC): operation __repr__ fails with partial args... hindering debugging sessions. + fix: use gettatr in both Operation & operation(). + enh: +helpfull err-msg when forgetting to set func on Operation. + test: +TC to check Opetation _-repr__ in new test-file. --- graphkit/base.py | 8 ++++---- graphkit/functional.py | 10 ++++++---- test/test_functional.py | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 test/test_functional.py diff --git a/graphkit/base.py b/graphkit/base.py index 15652cfe..89d759f9 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -99,7 +99,7 @@ def compute(self, inputs): ``inputs``. """ - raise NotImplementedError + raise NotImplementedError("Define callable of %r!" % self) def _compute(self, named_inputs, outputs=None): inputs = [named_inputs[d] for d in self.needs] @@ -155,9 +155,9 @@ def __repr__(self): """ return u"%s(name='%s', needs=%s, provides=%s)" % \ (self.__class__.__name__, - self.name, - self.needs, - self.provides) + getattr(self, "name", None), + getattr(self, "needs", None), + getattr(self, "provides", None)) class NetworkOperation(Operation, plot.Plotter): diff --git a/graphkit/functional.py b/graphkit/functional.py index 5699f410..ecc47ce2 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -154,12 +154,14 @@ def __repr__(self): """ Display more informative names for the Operation class """ + func_name = getattr(self, "fn") + func_name = func_name and getattr(func_name, "__name__", None) return u"%s(name='%s', needs=%s, provides=%s, fn=%s)" % \ (self.__class__.__name__, - self.name, - self.needs, - self.provides, - self.fn.__name__) + getattr(self, "name", None), + getattr(self, "needs", None), + getattr(self, "provides", None), + func_name) diff --git a/test/test_functional.py b/test/test_functional.py new file mode 100644 index 00000000..c81dc44b --- /dev/null +++ b/test/test_functional.py @@ -0,0 +1,34 @@ +# Copyright 2016, Yahoo Inc. +# Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. + +import pytest + +from graphkit import Operation, operation + + +@pytest.fixture(params=[None, ['some']]) +def opname(request): + return request.param + + +@pytest.fixture(params=[None, ['some']]) +def opneeds(request): + return request.param + + +@pytest.fixture(params=[None, ['some']]) +def opprovides(request): + return request.param + + +def test_operation_repr(opname, opneeds, opprovides): + # Simply check __repr__() does not crash on partial attributes. + + kw = locals().copy() + kw = {name[2:]: arg for name, arg in kw.items()} + + op = operation(**kw) + str(op) + + op = Operation(**kw) + str(op) From 63432f300a92cf1b03e9b7418598b6ea9f9eb2e1 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 12:31:30 +0300 Subject: [PATCH 100/167] enh: make repr(ExecPlan) sorter --- graphkit/network.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 706b87c3..972753e5 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -530,9 +530,10 @@ def _build_pydot(self, **kws): return build_pydot(**mykws) def __repr__(self): - return ( - "ExecutionPlan:\n +--inputs:%s, \n +--outputs=%s\n +--steps=%s)" - % (self.inputs, self.outputs, self.steps)) + steps = ["\n +--%s" % s for s in self.steps] + return ( + "ExecutionPlan(inputs=%s, outputs=%s, steps:%s)" + % (self.inputs, self.outputs, ''.join(steps))) def get_data_node(self, name): """ From 1ac3ac2c9a266489cfe366fc19db83786a318224 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 12:34:42 +0300 Subject: [PATCH 101/167] ENH(net): annotate OP-EXCEPTIONS with ExecPlan... to aid debug sessions, inspired by @syamajala's 309338340. + enh: centralize calling of callables from both parrallel/serial. + DOC: +new sesction in "compose" with doctests exemplifying errors. --- docs/source/graph_composition.rst | 24 ++++++++++++++++++++++++ graphkit/network.py | 14 +++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index a1f99756..46a77bfc 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -134,3 +134,27 @@ As always, we can run computations with this graph by simply calling it:: >>> merged_graph({'a': 2, 'b': 5, 'c': 5}, outputs=["cab"]) {'cab': 50} + + +Errors +------ + +If an operation fails, its exception gets annotated with the folllowing properties +as a debug aid:: + +>>> def scream(*args): +... raise ValueError("Wrong!") + +>>> try: +... compose("errgraph")( +... operation(name="screamer", needs=['a'], provides=["foo"])(scream) +... )({'a': None}) +... except ValueError as ex: +... print(ex.execution_node) +... print(ex.execution_plan) +FunctionalOperation(name='screamer', needs=['a'], provides=['foo']) +ExecutionPlan(inputs=('a',), outputs=(), steps: + +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])) + +Of course from the :class:`ExecutionPlan` you can explore its ``dag`` property +or the ``net`` that compiled it. \ No newline at end of file diff --git a/graphkit/network.py b/graphkit/network.py index 972753e5..10f5957d 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -582,6 +582,14 @@ def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): overwrites[value_name] = solution[value_name] solution[value_name] = inputs[value_name] + def _call_operation(self, op, solution): + try: + return op._compute(solution) + except Exception as ex: + ex.execution_node = op + ex.execution_plan = self + raise + def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, thread_pool_size=10 ): @@ -636,8 +644,8 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, break done_iterator = pool.imap_unordered( - lambda op: (op,op._compute(solution)), - upnext) + (lambda op: (op, self._call_operation(op, solution))), upnext) + for op, result in done_iterator: solution.update(result) self.executed.add(op) @@ -658,7 +666,7 @@ def _execute_sequential_method(self, inputs, solution, overwrites): t0 = time.time() # compute layer outputs - layer_outputs = step._compute(solution) + layer_outputs = self._call_operation(step, solution) # add outputs to solution solution.update(layer_outputs) From 9d6bb929c5d85036f2690ebad7db337124fe36d9 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 12:57:03 +0300 Subject: [PATCH 102/167] enh(travis): pytest -v implied(~) when many test-files --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bb604dfa..03d5ceb6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,9 +27,9 @@ script: # Also give `-m 'slow or not slow'` since `not slow` adopted in `setup.cfg`. - | if [[ "$TRAVIS_PYTHON_VERSION" = '3.7' ]]; then - pytest -v --cov=graphkit -m 'slow or not slow' + pytest --cov=graphkit -m 'slow or not slow' else - pytest -v --cov=graphkit test/ + pytest --cov=graphkit test/ fi deploy: From 26ea7bfafa3a78a96de6c55fb666660d8463016e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 13:24:41 +0300 Subject: [PATCH 103/167] DOC: sphinx fixes and +INTERNALS section + Sphinx builds CLEAN. --- README.md | 9 ++++++--- docs/source/graph_composition.rst | 12 +++++++++--- docs/source/index.rst | 15 ++++++++++----- graphkit/functional.py | 2 +- graphkit/network.py | 15 ++++++++------- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 2559dbf9..22d933a7 100644 --- a/README.md +++ b/README.md @@ -60,11 +60,14 @@ As you can see, any function can be used as an operation in GraphKit, even ones ## Plotting -For debugging the above graph-operation you may plot it using these methods: +For debugging the above graph-operation you may plot the *execution plan* +of the last computation it using these methods:: ```python - graphop.plot(show=True, solution=out) # open a matplotlib window with solution values in nodes - graphop.plot("intro.svg") # original graph; other formats: png, jpg, pdf, ... + graphop.plot(show=True) # open a matplotlib window + graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... + graphop.plot() # without arguments return a pydot.DOT object + graphop.plot(solution=out) # annotate graph with solution values ``` ![Intro graph](docs/source/images/intro.svg "Intro graph") diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index 46a77bfc..ebb85f65 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -98,7 +98,7 @@ Sometimes you will have an existing computation graph to which you want to add o >>> sol {'a_minus_ab_minus_c': -13} -This yields a graph which looks like this (see :ref:`Plotting`): +This yields a graph which looks like this (see :ref:`plotting`): .. image:: images/bigger_example_graph.svg @@ -136,11 +136,12 @@ As always, we can run computations with this graph by simply calling it:: {'cab': 50} + Errors ------ If an operation fails, its exception gets annotated with the folllowing properties -as a debug aid:: +as a debug aid: >>> def scream(*args): ... raise ValueError("Wrong!") @@ -157,4 +158,9 @@ ExecutionPlan(inputs=('a',), outputs=(), steps: +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])) Of course from the :class:`ExecutionPlan` you can explore its ``dag`` property -or the ``net`` that compiled it. \ No newline at end of file +or the ``net`` that compiled it. + +Execution internals +------------------- +.. automodule:: graphkit.network + diff --git a/docs/source/index.rst b/docs/source/index.rst index f542da58..51bcb006 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -38,7 +38,7 @@ Here's how to install:: pip install graphkit -OR with dependencies for plotting support (and you need to install `Graphviz +OR with dependencies for plotting support (and you need to install `Graphviz `_ program separately with your OS tools):: pip install graphkit[plot] @@ -76,13 +76,18 @@ Here's a Python script with an example GraphKit computation graph that produces As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! +.. _plotting: + Plotting -------- -For debugging the above graph-operation you may plot it using these methods:: +For debugging the above graph-operation you may plot the *execution plan* +of the last computation it using these methods:: - graphop.plot(show=True, solution=out) # open a matplotlib window with solution values in nodes + graphop.plot(show=True) # open a matplotlib window graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... + graphop.plot() # without arguments return a pydot.DOT object + graphop.plot(solution=out) # annotate graph with solution values .. image:: images/intro.svg :alt: Intro graph @@ -93,8 +98,8 @@ For debugging the above graph-operation you may plot it using these methods:: The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. .. Tip:: - The ``pydot.Dot`` instances returned by ``plot()`` are rendered - directly in *Jupyter/IPython* notebooks as SVG images. + The `pydot.Dot `_ instances returned by ``plot()`` + are rendered directly in *Jupyter/IPython* notebooks as SVG images. .. NOTE:: For plots, `Graphviz `_ program must be in your PATH, diff --git a/graphkit/functional.py b/graphkit/functional.py index ecc47ce2..9c0348c7 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -86,7 +86,7 @@ class operation(Operation): :param dict params: A dict of key/value pairs representing constant parameters associated with your operation. These can correspond to either - ``args`` or ``kwargs`` of ``fn`. + ``args`` or ``kwargs`` of ``fn``. """ def __init__(self, fn=None, **kwargs): diff --git a/graphkit/network.py b/graphkit/network.py index 10f5957d..e4957d59 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -1,16 +1,17 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -"""" -The main implementation of the network of operations & data to compute. +""" +Network-based computation of operations & data. -The execution of network *operations* (aka computation) is splitted -in 2 phases: +The execution of network *operations* is splitted in 2 phases: -- COMPILE: prune unsatisfied nodes, sort dag topologically & solve it, and - derive the *execution steps* (see below) based on the given *inputs* +COMPILE: + prune unsatisfied nodes, sort dag topologically & solve it, and + derive the *execution steps* (see below) based on the given *inputs* and asked *outputs*. -- EXECUTE: sequential or parallel invocation of the underlying functions +EXECUTE: + sequential or parallel invocation of the underlying functions of the operations with arguments from the ``solution``. Computations are based on 5 data-structures: From 6c7a3bf962a43627b409d5b34085c879570f4cb2 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 13:36:23 +0300 Subject: [PATCH 104/167] fix(build): +pytest-sphinx plugin --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index bf91ab44..93c2b835 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ test_reqs = plot_reqs + [ "pytest", "pytest-cov", + "pytest-sphinx", ] setup( From fb6abf7bb080687457f2f66bcb6eed1ab9253374 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 14:28:18 +0300 Subject: [PATCH 105/167] enh(build): set README as PyPi landing page --- setup.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/setup.py b/setup.py index 93c2b835..47a0fdfc 100644 --- a/setup.py +++ b/setup.py @@ -1,18 +1,14 @@ #!/usr/bin/env python # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +import io import os import re -import io from setuptools import setup -LONG_DESCRIPTION = """ -GraphKit is a lightweight Python module for creating and running ordered graphs -of computations, where the nodes of the graph correspond to computational -operations, and the edges correspond to output --> input dependencies between -those operations. Such graphs are useful in computer vision, machine learning, -and many other domains. -""" + +with open("README.md") as f: + long_description = f.read() # Grab the version using convention described by flask # https://github.com/pallets/flask/blob/master/setup.py#L10 @@ -20,20 +16,20 @@ version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) plot_reqs = [ - "matplotlib", # to test plot - "pydot", # to test plot + "matplotlib", # to test plot + "pydot", # to test plot ] test_reqs = plot_reqs + [ - "pytest", - "pytest-cov", - "pytest-sphinx", + "pytest", + "pytest-cov", + "pytest-sphinx", ] setup( name='graphkit', version=version, description='Lightweight computation graphs for Python', - long_description=LONG_DESCRIPTION, + long_description=long_description, author='Huy Nguyen, Arel Cordero, Pierre Garrigues, Rob Hess, Tobi Baumgartner, Clayton Mellina', author_email='huyng@yahoo-inc.com', url='http://github.com/yahoo/graphkit', From e6cd195aa0946218cc4f6e90caea2bf9e7d0ab0c Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 14:44:34 +0300 Subject: [PATCH 106/167] drop(site): rtd_theme is the default for sphinx --- .travis.yml | 2 +- docs/source/conf.py | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 03d5ceb6..eccdd493 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ addons: install: - - pip install Sphinx sphinx_rtd_theme codecov packaging + - pip install Sphinx codecov packaging - "python -c $'import os, packaging.version as version\\nv = version.parse(os.environ.get(\"TRAVIS_TAG\", \"1.0\")).public\\nwith open(\"VERSION\", \"w\") as f: f.write(v)'" - pip install -e .[test] - cd docs diff --git a/docs/source/conf.py b/docs/source/conf.py index a92cff6f..54d2c6b8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,7 +17,6 @@ import sys import os -import sphinx_rtd_theme import packaging.version # If extensions (or modules to document with autodoc) are in another directory, @@ -109,11 +108,6 @@ # -- Options for HTML output ---------------------------------------------- -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. From a4aed58388a4228c6b525ac81ad537ac5d06e75d Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 14:45:29 +0300 Subject: [PATCH 107/167] doc(changes): +issues in v1.2.4 (old) release by yahoo --- CHANGES.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 CHANGES.rst diff --git a/CHANGES.rst b/CHANGES.rst new file mode 100644 index 00000000..6c3bf45e --- /dev/null +++ b/CHANGES.rst @@ -0,0 +1,12 @@ + ================ + Graphkit Changes + ================ + + +v1.2.4: Mar 2018 +================ +First public release + +- Issues in pruning algorithm: #24, #25 +- Blocking bug in plotting code for Python-3.x. +- Test-cases without assertions (just prints). \ No newline at end of file From 35b365b6cadc73af2fa02e503937f30658166998 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 18:13:10 +0300 Subject: [PATCH 108/167] feat(site): include CHANGES sections --- CHANGES.rst | 2 +- docs/source/changes.rst | 1 + docs/source/index.rst | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 docs/source/changes.rst diff --git a/CHANGES.rst b/CHANGES.rst index 6c3bf45e..599c6c53 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,7 @@ v1.2.4: Mar 2018 ================ -First public release +First public release. - Issues in pruning algorithm: #24, #25 - Blocking bug in plotting code for Python-3.x. diff --git a/docs/source/changes.rst b/docs/source/changes.rst new file mode 100644 index 00000000..d76c92b6 --- /dev/null +++ b/docs/source/changes.rst @@ -0,0 +1 @@ +.. include:: ../../CHANGES.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 51bcb006..949c3cc5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -22,6 +22,7 @@ GraphKit operations graph_composition + changes Lightweight computation graphs for Python From 6b841227fba60aed22f0e4d4057bcea2b4dec10e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 18:18:45 +0300 Subject: [PATCH 109/167] doc(changes): +FLOWCHART dot-file for v1.2.4 --- CHANGES.rst | 10 ++- docs/GraphkitFlowchart.dot | 18 +++++ docs/source/changes.rst | 5 ++ docs/source/images/GraphkitFlowchart.svg | 80 +++++++++++++++++++++ docs/source/images/GraphkitLegend.svg | 92 ++++++++++++------------ 5 files changed, 158 insertions(+), 47 deletions(-) create mode 100644 docs/GraphkitFlowchart.dot create mode 100644 docs/source/images/GraphkitFlowchart.svg diff --git a/CHANGES.rst b/CHANGES.rst index 599c6c53..fcabc536 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -9,4 +9,12 @@ First public release. - Issues in pruning algorithm: #24, #25 - Blocking bug in plotting code for Python-3.x. -- Test-cases without assertions (just prints). \ No newline at end of file +- Test-cases without assertions (just prints). + +|v124-flowchart| + + +.. _substitutions: + +.. |v124-flowchart| image:: docs/source/images/GraphkitFlowchart.svg + :alt: graphkit-v1.2.4 flowchart diff --git a/docs/GraphkitFlowchart.dot b/docs/GraphkitFlowchart.dot new file mode 100644 index 00000000..3f5d61f9 --- /dev/null +++ b/docs/GraphkitFlowchart.dot @@ -0,0 +1,18 @@ +# Render it manually with this command, and remember to update result in git: +# +# dot docs/GraphkitFlowchart.dot -Tsvg -odocs/source/images/GraphkitFlowchart.svg +# +digraph { + label="graphkit-v1.2.4 flowchart"; + labelloc=t; + + operations [shape=parallelogram fontname="italic"]; + compose [label="compose & compile" fontname="italic"]; + network [shape=parallelogram fontname="italic"]; + data [shape=rect label="inputs & outputs"]; + compute [fontname=italic fontname="italic"]; + solution [shape=rect]; + + operations -> compose -> network [arrowhead=vee]; + {data network} -> compute -> solution [arrowhead=vee]; +} \ No newline at end of file diff --git a/docs/source/changes.rst b/docs/source/changes.rst index d76c92b6..96330d66 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -1 +1,6 @@ .. include:: ../../CHANGES.rst + :end-before: .. _substitutions: + + +.. |v124-flowchart| image:: images/GraphkitFlowchart.svg + :alt: graphkit-v1.2.4 flowchart diff --git a/docs/source/images/GraphkitFlowchart.svg b/docs/source/images/GraphkitFlowchart.svg new file mode 100644 index 00000000..2b22a63d --- /dev/null +++ b/docs/source/images/GraphkitFlowchart.svg @@ -0,0 +1,80 @@ + + + + + + +%3 + +graphkit-v1.2.4 flowchart + + +operations + +operations + + + +compose + +compose & compile + + + +operations->compose + + + + + +network + +network + + + +compose->network + + + + + +compute + +compute + + + +network->compute + + + + + +data + +inputs & outputs + + + +data->compute + + + + + +solution + +solution + + + +compute->solution + + + + + diff --git a/docs/source/images/GraphkitLegend.svg b/docs/source/images/GraphkitLegend.svg index 798ba3a5..6b5f3f93 100644 --- a/docs/source/images/GraphkitLegend.svg +++ b/docs/source/images/GraphkitLegend.svg @@ -4,147 +4,147 @@ - + G - + cluster_legend - -Graphkit Legend + +Graphkit Legend operation - -operation + +operation graphop - -graph operation + +graph operation insteps - -execution step + +execution step executed - -executed + +executed data - -data + +data input - -input + +input output - -output + +output inp_out - -inp+out + +inp+out evicted - -evicted + +evicted pinned - -pinned + +pinned evpin - -evict+pin + +evict+pin sol - -in solution + +in solution e2 - -dependency + +dependency e1->e2 - - + + e3 - -optional + +optional e2->e3 - - + + e4 - -pruned dependency + +pruned dependency e3->e4 - - + + e5 - -execution sequence + +execution sequence e4->e5 - - -1 + + +1 From ed1d5006c3959891f6fe599009f759e3589c6c9d Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 18:58:18 +0300 Subject: [PATCH 110/167] doc(changes): +FLOWCHART-v1.3.0 --- CHANGES.rst | 4 +- docs/GraphkitFlowchart.dot | 25 ++- docs/source/changes.rst | 5 +- ...chart.svg => GraphkitFlowchart-v1.2.4.svg} | 0 .../images/GraphkitFlowchart-v1.3.0.svg | 145 ++++++++++++++++++ 5 files changed, 170 insertions(+), 9 deletions(-) rename docs/source/images/{GraphkitFlowchart.svg => GraphkitFlowchart-v1.2.4.svg} (100%) create mode 100644 docs/source/images/GraphkitFlowchart-v1.3.0.svg diff --git a/CHANGES.rst b/CHANGES.rst index fcabc536..afd0b0e1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -16,5 +16,7 @@ First public release. .. _substitutions: -.. |v124-flowchart| image:: docs/source/images/GraphkitFlowchart.svg +.. |v130-flowchart| image:: docs/source/images/GraphkitFlowchart-v1.3.0.svg + :alt: graphkit-v1.3.0 flowchart +.. |v124-flowchart| image:: docs/source/images/GraphkitFlowchart-v1.2.4.svg :alt: graphkit-v1.2.4 flowchart diff --git a/docs/GraphkitFlowchart.dot b/docs/GraphkitFlowchart.dot index 3f5d61f9..af1217f3 100644 --- a/docs/GraphkitFlowchart.dot +++ b/docs/GraphkitFlowchart.dot @@ -1,18 +1,31 @@ # Render it manually with this command, and remember to update result in git: # -# dot docs/GraphkitFlowchart.dot -Tsvg -odocs/source/images/GraphkitFlowchart.svg +# dot docs/GraphkitFlowchart.dot -Tsvg -odocs/source/images/GraphkitFlowchart-vX.Y.Z.svg # digraph { - label="graphkit-v1.2.4 flowchart"; + label="graphkit-v1.3.0 flowchart"; labelloc=t; operations [shape=parallelogram fontname="italic"]; - compose [label="compose & compile" fontname="italic"]; + compose [fontname="italic"]; network [shape=parallelogram fontname="italic"]; - data [shape=rect label="inputs & outputs"]; - compute [fontname=italic fontname="italic"]; + inputs [shape=rect label="input names"]; + outputs [shape=rect label="output names"]; + subgraph cluster_compute { + label=compute + fontname=bold + style=dashed; + labelloc=b; + + compile [fontname="italic"]; + plan [shape=parallelogram label="execution plan" fontname="italic"]; + execute [fontname=italic fontname="italic"]; + } + values [shape=rect label="input values"]; solution [shape=rect]; + overwrites [shape=rect]; operations -> compose -> network [arrowhead=vee]; - {data network} -> compute -> solution [arrowhead=vee]; + {network inputs outputs} -> compile -> plan [arrowhead=vee]; + {plan values} -> execute -> {solution overwrites} [arrowhead=vee]; } \ No newline at end of file diff --git a/docs/source/changes.rst b/docs/source/changes.rst index 96330d66..41fd88d3 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -1,6 +1,7 @@ .. include:: ../../CHANGES.rst :end-before: .. _substitutions: - -.. |v124-flowchart| image:: images/GraphkitFlowchart.svg +.. |v130-flowchart| image:: images/GraphkitFlowchart-v1.3.0.svg + :alt: graphkit-v1.3.0 flowchart +.. |v124-flowchart| image:: images/GraphkitFlowchart-v1.2.4.svg :alt: graphkit-v1.2.4 flowchart diff --git a/docs/source/images/GraphkitFlowchart.svg b/docs/source/images/GraphkitFlowchart-v1.2.4.svg similarity index 100% rename from docs/source/images/GraphkitFlowchart.svg rename to docs/source/images/GraphkitFlowchart-v1.2.4.svg diff --git a/docs/source/images/GraphkitFlowchart-v1.3.0.svg b/docs/source/images/GraphkitFlowchart-v1.3.0.svg new file mode 100644 index 00000000..ac88bfec --- /dev/null +++ b/docs/source/images/GraphkitFlowchart-v1.3.0.svg @@ -0,0 +1,145 @@ + + + + + + +%3 + +graphkit-v1.3.0 flowchart + +cluster_compute + +compute + + + +operations + +operations + + + +compose + +compose + + + +operations->compose + + + + + +network + +network + + + +compose->network + + + + + +compile + +compile + + + +network->compile + + + + + +inputs + +input names + + + +inputs->compile + + + + + +outputs + +output names + + + +outputs->compile + + + + + +plan + +execution plan + + + +compile->plan + + + + + +execute + +execute + + + +plan->execute + + + + + +solution + +solution + + + +execute->solution + + + + + +overwrites + +overwrites + + + +execute->overwrites + + + + + +values + +input values + + + +values->execute + + + + + From 554cd3e252ec4e321b113158fd53dff14280d9d7 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 18:59:24 +0300 Subject: [PATCH 111/167] enh(site): sphins-extlinks extension for GH-issue links --- CHANGES.rst | 2 +- docs/source/conf.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index afd0b0e1..fbcadb01 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,7 +7,7 @@ v1.2.4: Mar 2018 ================ First public release. -- Issues in pruning algorithm: #24, #25 +- Issues in pruning algorithm: :gh:`24`, :gh:`25` - Blocking bug in plotting code for Python-3.x. - Test-cases without assertions (just prints). diff --git a/docs/source/conf.py b/docs/source/conf.py index 54d2c6b8..859419e9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -35,9 +35,14 @@ extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.coverage', - 'sphinx.ext.imgmath' + 'sphinx.ext.imgmath', + 'sphinx.ext.extlinks', ] +extlinks = { + 'gh': ('https://github.com/yahoo/graphkit/issues/%s', '#'), +} + # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] From 20fd385c070fdd5c072aaf01e7ad17bd46ff1a07 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 21:16:06 +0300 Subject: [PATCH 112/167] chore: add myself to the authors --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 47a0fdfc..20c589df 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,8 @@ version=version, description='Lightweight computation graphs for Python', long_description=long_description, - author='Huy Nguyen, Arel Cordero, Pierre Garrigues, Rob Hess, Tobi Baumgartner, Clayton Mellina', + author='Huy Nguyen, Arel Cordero, Pierre Garrigues, Rob Hess, ' + 'Tobi Baumgartner, Clayton Mellina, ankostis@gmail.com', author_email='huyng@yahoo-inc.com', url='http://github.com/yahoo/graphkit', packages=['graphkit'], From 6aaa6da5a5b950bdc172c33bcbc9b77507cce56b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 21:24:48 +0300 Subject: [PATCH 113/167] DOC: update package coordinates in `setup.py` --- setup.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 20c589df..1cec5132 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,12 @@ 'Tobi Baumgartner, Clayton Mellina, ankostis@gmail.com', author_email='huyng@yahoo-inc.com', url='http://github.com/yahoo/graphkit', + project_urls={ + "Documentation": "https://pythonhosted.org/graphkit/", + "Release Notes": "https://pythonhosted.org/graphkit/changes.html", + "Sources": "https://github.com/yahoo/graphkit", + "Bug Tracker": "https://github.com/yahoo/graphkit/issues", + }, packages=['graphkit'], install_requires=[ "networkx; python_version >= '3.5'", @@ -46,7 +52,9 @@ }, tests_require=test_reqs, license='Apache-2.0', - keywords=['graph', 'computation graph', 'DAG', 'directed acyclical graph'], + keywords=[ + 'graph', 'computation graph', 'DAG', 'directed acyclical graph', + 'executor', 'scheduler', 'etl', 'workflow', 'pipeline'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: Apache Software License', @@ -61,8 +69,11 @@ 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Scientific/Engineering', 'Topic :: Software Development' ], + zip_safe=True, platforms='Windows,Linux,Solaris,Mac OS-X,Unix' ) From b97563e3d7d0d533f071da0776183d9496205998 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 21:37:11 +0300 Subject: [PATCH 114/167] DOC(BADGES): more of them --- README.md | 5 ++- docs/source/index.rst | 83 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 75 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 22d933a7..b1db0c46 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # GraphKit -[![PyPI version](https://badge.fury.io/py/graphkit.svg)](https://badge.fury.io/py/graphkit) [![Build Status](https://travis-ci.org/yahoo/graphkit.svg?branch=master)](https://travis-ci.org/yahoo/graphkit) [![codecov](https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg)](https://codecov.io/gh/yahoo/graphkit) +[![Latest version in PyPI](https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version)](https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version) [![Latest version in GitHub](https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases)](https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases) [![Supported Python versions of latest release in PyPi](https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python)](https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python) [![Build Status](https://travis-ci.org/yahoo/graphkit.svg?branch=master)](https://travis-ci.org/yahoo/graphkit) [![codecov](https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg)](https://codecov.io/gh/yahoo/graphkit) [![License](https://img.shields.io/pypi/l/graphkit.svg)](https://img.shields.io/pypi/l/graphkit.svg) + +[![Github watchers](https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social) [![Github stargazers](https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social) [![Github forks](https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social) [![Issues count](http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social)](http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social) [Full Documentation](https://pythonhosted.org/graphkit/) @@ -78,3 +80,4 @@ of the last computation it using these methods:: # License Code licensed under the Apache License, Version 2.0 license. See LICENSE file for terms. + diff --git a/docs/source/index.rst b/docs/source/index.rst index 949c3cc5..3b8d54e5 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,14 +8,20 @@ GraphKit ======== -.. image:: https://badge.fury.io/py/graphkit.svg - :target: https://badge.fury.io/py/graphkit -.. image:: https://travis-ci.org/yahoo/graphkit.svg?branch=master - :target: https://travis-ci.org/yahoo/graphkit -.. image:: https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg - :target: https://codecov.io/gh/yahoo/graphkit +|travis-status| |cover-status| |gh-version| |pypi-version| |python-ver| +|dev-status| |downloads-count| |codestyle| |proj-lic| -**It's a DAG all the way down** +|gh-watch| |gh-star| |gh-fork| |gh-issues| + +**It's a DAG all the way down!** |sample-plot| + +Lightweight computation graphs for Python +----------------------------------------- + +GraphKit is a lightweight Python module for creating and running ordered graphs of computations, +where the nodes of the graph correspond to computational operations, and the edges +correspond to output --> input dependencies between those operations. +Such graphs are useful in computer vision, machine learning, and many other domains. .. toctree:: :maxdepth: 2 @@ -25,11 +31,6 @@ GraphKit changes -Lightweight computation graphs for Python ------------------------------------------ - -GraphKit is a lightweight Python module for creating and running ordered graphs of computations, where the nodes of the graph correspond to computational operations, and the edges correspond to output --> input dependencies between those operations. Such graphs are useful in computer vision, machine learning, and many other domains. - .. _quick-start: Quick start @@ -113,3 +114,61 @@ License ------- Code licensed under the Apache License, Version 2.0 license. See LICENSE file for terms. + + +.. |travis-status| image:: https://travis-ci.org/yahoo/graphkit.svg + :alt: Travis continuous integration testing ok? (Linux) + :scale: 100% + :target: https://travis-ci.org/yahoo/graphkit/builds + +.. |cover-status| image:: https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg + :target: https://codecov.io/gh/yahoo/graphkit + +.. |gh-version| image:: https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases + :target: https://github.com/yahoo/graphkit/releases + :alt: Latest version in GitHub + +.. |pypi-version| image:: https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version + :target: https://pypi.python.org/pypi/graphkit/ + :alt: Latest version in PyPI + +.. |python-ver| image:: https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python + :target: https://pypi.python.org/pypi/graphkit/ + :alt: Supported Python versions of latest release in PyPi + +.. |dev-status| image:: https://pypip.in/status/graphkit/badge.svg + :target: https://pypi.python.org/pypi/graphkit/ + :alt: Development Status + +.. |downloads-count| image:: https://pypip.in/download/graphkit/badge.svg?period=month&label=PyPi%20downloads + :target: https://pypi.python.org/pypi/graphkit/ + :alt: PyPi downloads + +.. |codestyle| image:: https://img.shields.io/badge/code%20style-black-black.svg + :target: https://github.com/ambv/black + :alt: Code Style + +.. |gh-watch| image:: https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social + :target: https://github.com/yahoo/graphkit + :alt: Github watchers + +.. |gh-star| image:: https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social + :target: https://github.com/yahoo/graphkit + :alt: Github stargazers + +.. |gh-fork| image:: https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social + :target: https://github.com/yahoo/graphkit + :alt: Github forks + +.. |gh-issues| image:: http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social + :target: https://github.com/yahoo/graphkit/issues + :alt: Issues count + +.. |proj-lic| image:: https://img.shields.io/pypi/l/graphkit.svg + :target: https://www.apache.org/licenses/LICENSE-2.0 + :alt: Apache License, version 2.0 + +.. |sample-plot| image:: images/sample_plot.svg + :alt: sample graphkit plot + :width: 120px + :align: middle From ababd260838cf32734ab3285f4f6171315ee9653 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 19:00:56 +0300 Subject: [PATCH 115/167] DOC(changes): v1.2.4 --- CHANGES.rst | 128 +++++++++++++++++++++++++++-- docs/source/changes.rst | 6 ++ docs/source/images/sample_plot.svg | 99 ++++++++++++++++++++++ 3 files changed, 226 insertions(+), 7 deletions(-) create mode 100644 docs/source/images/sample_plot.svg diff --git a/CHANGES.rst b/CHANGES.rst index fbcadb01..04ec7d0a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,11 +1,119 @@ - ================ - Graphkit Changes - ================ +################## +Graphkit Changelog +################## +v1.3.0 (Oct 2019): New DAG solver, better plotting & "sideffect" +================================================================ -v1.2.4: Mar 2018 -================ -First public release. +Kept external API (hopefully) the same, but revamped pruning algorithm and +refactored network compute/compile structure, so results may change; significantly +enhanced plotting. The only new feature actually is the :class:`sideffect`` modifier. + +Network: +-------- + +- FIX(:gh:`18`, :gh:`26`, :gh:`29`, :gh:`17`, :gh:`20`): Revamped DAG SOLVER + to fix bad pruning described in :gh:`24` & :gh:`25` + + Pruning now works by breaking incoming provide-links to any given + intermedediate inputs dropping operations with partial inputs or without outputs. + + The end result is that operations in the graph that do not have all inputs satisfied, + they are skipped (in v1.2.4 they crashed). + + Also started annotating edges with optionals, to make proper use of the underlying + ``networkx`` graph. + + |v130-flowchart| + +- REFACT(:gh:`21`, :gh:`29`): Refactored Network and introduced :class:`ExecutionPlan` to keep + compilation results (the old ``steps`` list, plus input/output names). + + Moved also the check for when to evict a value, from running the execution-plan, + to whenbuilding it; thus, execute methods don't need outputs anymore. + +- ENH(:gh:`26`): "Pin* input values that may be overriten by calculated ones. + + This required the introduction of the new :class:`PinInstruction` in + the execution plan. + +- FIX(:gh:`23`, :gh:`22`-2.4.3): Keep consistent order of ``networkx.DiGraph`` + and *sets*, to generate deterministic solutions. + + *Unfortunately*, it non-determinism has not been fixed in < PY3.5, just + reduced the frequency of `spurious failures + `_, caused by + unstable dicts, and the use of subgraphs. + +- enh: Mark outputs produced by :class:`NetworkOperation`'s needs as ``optional``. + TODO: subgraph network-operations would not be fully functional until + *"optional outpus"* are dealt with (see :gh:`22`-2.5). + +- enh: Annotate operation exceptions with ``ExecutionPlan`` to aid debug sessions, + +- drop: methods ``list_layers()``/``show layers()`` not needed, ``repr()`` is + a better replacement. + + +Plotting: +--------- + +- ENH(:gh:`13`, :gh:`26`, :gh:`29`): Now network remembers last plan and uses that + to overlay graphs with the internals of the planing and execution: |sample-plot| + + + - execution-steps & order + - delete & pin instructions + - given inputs & asked outputs + - solution values (just if they are present) + - "optional" needs & broken links during pruning + +- REFACT: Move all API doc on plotting in a single module, splitted in 2 phases, + build DOT & render DOT + +- FIX(:gh:`13`): bring plot writing into files up-to-date from PY2; do not create plot-file + if given file-extension is not supported. + +- FEAT: path `pydot library `_ to support rendering + in *Jupyter notebooks*. + + + +Testing & other code: +--------------------- + + - Increased coverage from 77% --> 90%. + +- ENH(:gh:`28`): use ``pytest``, to facilitate TCs parametrization. + +- ENH(:gh:`30`): Doctest all code; enabled many assertions that were just print-outs + in v1.2.4. + +- FIX: ``operation.__repr__()`` was crashing when not all arguments + had been set - a condition frequtnly met during debugging session or failed + TCs (inspired by @syamajala's 309338340). + +- enh: Sped up parallel/multihtread TCs by reducing delays & repetitions. + + .. tip:: + You need ``pytest -m slow`` to run those slow tests. + + + +Chore & Docs: +------------- + +- FEAT: add changelog in ``CHANGES.rst`` file, containing flowcharts + to compare versions ``v1.2.4 <--> v1.3..0``. +- enh: updated site & documentation for all new features, comparing with v1.2.4. +- enh(:gh:`30`): added "API reference' chapter. +- drop(build): ``sphinx_rtd_theme`` library is the default theme for Sphinx now. +- enh(build): Add ``test`` *pip extras*. + + + +v1.2.4 (Mar 2018) +================= - Issues in pruning algorithm: :gh:`24`, :gh:`25` - Blocking bug in plotting code for Python-3.x. @@ -13,10 +121,16 @@ First public release. |v124-flowchart| - .. _substitutions: + +.. |sample-plot| image:: docs/source/images/sample_plot.svg + :alt: sample graphkit plot + :width: 120px + :align: bottom .. |v130-flowchart| image:: docs/source/images/GraphkitFlowchart-v1.3.0.svg :alt: graphkit-v1.3.0 flowchart + :scale: 75% .. |v124-flowchart| image:: docs/source/images/GraphkitFlowchart-v1.2.4.svg :alt: graphkit-v1.2.4 flowchart + :scale: 75% diff --git a/docs/source/changes.rst b/docs/source/changes.rst index 41fd88d3..8dbd02c5 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -1,7 +1,13 @@ .. include:: ../../CHANGES.rst :end-before: .. _substitutions: +.. |sample-plot| image:: images/sample_plot.svg + :alt: sample graphkit plot + :width: 120px + :align: bottom .. |v130-flowchart| image:: images/GraphkitFlowchart-v1.3.0.svg :alt: graphkit-v1.3.0 flowchart + :width: 75% .. |v124-flowchart| image:: images/GraphkitFlowchart-v1.2.4.svg :alt: graphkit-v1.2.4 flowchart + :scale: 75% diff --git a/docs/source/images/sample_plot.svg b/docs/source/images/sample_plot.svg new file mode 100644 index 00000000..635e5e7c --- /dev/null +++ b/docs/source/images/sample_plot.svg @@ -0,0 +1,99 @@ + + + + + + +G + + + +a + +a + + + +must run + +must run + + + +a->must run + + + + + +overriden + +overriden + + + +must run->overriden + + + + + +must run->overriden + + +1 + + + +calced + +calced + + + +must run->calced + + + + + +add + +add + + + +overriden->add + + + + + +overriden->add + + +2 + + + +calced->add + + + + + +asked + +asked + + + +add->asked + + + + + From 18bdb88a0df603ed404e8e4c06b6a35642315ccb Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 22:06:54 +0300 Subject: [PATCH 116/167] doc(changes): mark dates of all old pypi-releases. --- CHANGES.rst | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 04ec7d0a..102ae8b2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -112,8 +112,8 @@ Chore & Docs: -v1.2.4 (Mar 2018) -================= +v1.2.4 (Mar 7, 2018) +==================== - Issues in pruning algorithm: :gh:`24`, :gh:`25` - Blocking bug in plotting code for Python-3.x. @@ -121,6 +121,42 @@ v1.2.4 (Mar 2018) |v124-flowchart| + + +1.2.2 (Mar 7, 2018) +=================== + + +1.2.1 (Feb 23, 2018) +==================== + + +1.2.0 (Feb 13, 2018) +==================== + + +1.1.0 (Nov 9, 2017) +=================== + + +1.0.4 (Nov 3, 2017) +=================== + + +1.0.3 (Jan 31, 2017) +==================== + + +1.0.2 (Sep 29, 2016) +==================== + +1.0.1 (Aug 24, 2016) +==================== + +1.0 (Aug 2, 2016) +================= +First public release in PyPi. + .. _substitutions: From 510a9cd2d0e2a94e7f567523ca6b9d61914ceb76 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 22:13:08 +0300 Subject: [PATCH 117/167] chore(ver): bump version 1.2.4-->1.3.0 --- graphkit/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/__init__.py b/graphkit/__init__.py index b930a65c..56c9f5ce 100644 --- a/graphkit/__init__.py +++ b/graphkit/__init__.py @@ -2,7 +2,7 @@ # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. __author__ = 'hnguyen' -__version__ = '1.2.4' +__version__ = '1.3.0' from .functional import operation, compose From 7bd3f856e4313e1205d0deb0a6c8d05ea339d8f6 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 22:13:38 +0300 Subject: [PATCH 118/167] minor leftovers (refact & docs) --- graphkit/network.py | 7 +++++-- graphkit/plot.py | 11 ++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index e4957d59..f81632e9 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -293,6 +293,8 @@ def _collect_unsatisfied_operations(self, dag, inputs): # Prune operations that ended up providing no output. unsatisfied.append(node) else: + # It's ok not to dig into edge-data("optional") here, + # we care about all needs, including broken ones. real_needs = set(n for n in node.needs if not isinstance(n, optional)) if real_needs.issubset(op_satisfaction[node]): @@ -334,7 +336,7 @@ def _prune_graph(self, outputs, inputs): dag = self.graph # Ignore input names that aren't in the graph. - graph_inputs = iset(dag.nodes) & inputs # preserve order + graph_inputs = set(dag.nodes) & set(inputs) # unordered, iterated, but ok # Scream if some requested outputs aren't in the graph. unknown_outputs = iset(outputs) - dag.nodes @@ -373,7 +375,7 @@ def _prune_graph(self, outputs, inputs): # Clone it so that it is picklable. pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied).copy() - return pruned_dag, tuple(broken_edges) + return pruned_dag, broken_edges def compile(self, inputs=(), outputs=()): """ @@ -644,6 +646,7 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, if len(upnext) == 0: break + ## TODO: accept pool from caller done_iterator = pool.imap_unordered( (lambda op: (op, self._call_operation(op, solution))), upnext) diff --git a/graphkit/plot.py b/graphkit/plot.py index 19b1cdeb..b41ea99f 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -251,7 +251,7 @@ def get_node_name(a): # kw["tooltip"] = str(solution.get(nx_node)) # not working :-() node = pydot.Node(name=nx_node, shape=shape, **kw) else: # Operation - kw = {} + kw = {"fontname": "italic"} if steps and nx_node in steps: kw["penwdth"] = steps_thickness @@ -337,6 +337,7 @@ def render_pydot(dot, filename=None, show=False): See :meth:`Plotter.plot()` for sample code. """ + # TODO: research https://plot.ly/~empet/14007.embed # Save plot # if filename: @@ -381,10 +382,10 @@ def legend(filename=None, show=None): subgraph cluster_legend { label="Graphkit Legend"; - operation [shape=oval]; - graphop [shape=egg label="graph operation"]; - insteps [penwidth=3 label="execution step"]; - executed [style=filled fillcolor=wheat]; + operation [shape=oval fontname=italic]; + graphop [shape=egg label="graph operation" fontname=italic]; + insteps [penwidth=3 label="execution step" fontname=italic]; + executed [style=filled fillcolor=wheat fontname=italic]; operation -> graphop -> insteps -> executed [style=invis]; data [shape=rect]; From df6c3ee30b40ba76325c2d71319ea38eadc7583b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 22:16:08 +0300 Subject: [PATCH 119/167] STYLE: BLACKen code format --- graphkit/__init__.py | 4 +- graphkit/base.py | 55 ++--- graphkit/functional.py | 50 +++-- graphkit/network.py | 86 ++++---- setup.py | 116 +++++----- test/test_functional.py | 6 +- test/test_graphkit.py | 478 ++++++++++++++++++++++------------------ 7 files changed, 425 insertions(+), 370 deletions(-) diff --git a/graphkit/__init__.py b/graphkit/__init__.py index 56c9f5ce..8ceca9be 100644 --- a/graphkit/__init__.py +++ b/graphkit/__init__.py @@ -1,8 +1,8 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -__author__ = 'hnguyen' -__version__ = '1.3.0' +__author__ = "hnguyen" +__version__ = "1.3.0" from .functional import operation, compose diff --git a/graphkit/base.py b/graphkit/base.py index 89d759f9..116d30a5 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -17,6 +17,7 @@ class Data(object): This class an "abstract" class that should be extended by any class working with data in the HiC framework. """ + def __init__(self, **kwargs): pass @@ -26,6 +27,7 @@ def get_data(self): def set_data(self, data): raise NotImplementedError + class Operation(object): """ This is an abstract class representing a data transformation. To use this, @@ -62,10 +64,10 @@ def __init__(self, **kwargs): """ # (Optional) names for this layer, and the data it needs and provides - self.name = kwargs.get('name') - self.needs = kwargs.get('needs') - self.provides = kwargs.get('provides') - self.params = kwargs.get('params', {}) + self.name = kwargs.get("name") + self.needs = kwargs.get("needs") + self.provides = kwargs.get("provides") + self.params = kwargs.get("params", {}) # call _after_init as final step of initialization self._after_init() @@ -75,8 +77,7 @@ def __eq__(self, other): Operation equality is based on name of layer. (__eq__ and __hash__ must be overridden together) """ - return bool(self.name is not None and - self.name == getattr(other, 'name', None)) + return bool(self.name is not None and self.name == getattr(other, "name", None)) def __hash__(self): """ @@ -133,11 +134,11 @@ def __getstate__(self): result = {} # this check should get deprecated soon. its for downward compatibility # with earlier pickled operation objects - if hasattr(self, 'params'): - result["params"] = self.__dict__['params'] - result["needs"] = self.__dict__['needs'] - result["provides"] = self.__dict__['provides'] - result["name"] = self.__dict__['name'] + if hasattr(self, "params"): + result["params"] = self.__dict__["params"] + result["needs"] = self.__dict__["needs"] + result["provides"] = self.__dict__["provides"] + result["name"] = self.__dict__["name"] return result @@ -153,16 +154,17 @@ def __repr__(self): """ Display more informative names for the Operation class """ - return u"%s(name='%s', needs=%s, provides=%s)" % \ - (self.__class__.__name__, - getattr(self, "name", None), - getattr(self, "needs", None), - getattr(self, "provides", None)) + return u"%s(name='%s', needs=%s, provides=%s)" % ( + self.__class__.__name__, + getattr(self, "name", None), + getattr(self, "needs", None), + getattr(self, "provides", None), + ) class NetworkOperation(Operation, plot.Plotter): def __init__(self, **kwargs): - self.net = kwargs.pop('net') + self.net = kwargs.pop("net") Operation.__init__(self, **kwargs) # set execution mode to single-threaded sequential by default @@ -177,8 +179,11 @@ def _build_pydot(self, **kws): def _compute(self, named_inputs, outputs=None): return self.net.compute( - named_inputs, outputs, method=self._execution_method, - overwrites_collector=self._overwrites_collector) + named_inputs, + outputs, + method=self._execution_method, + overwrites_collector=self._overwrites_collector, + ) def __call__(self, *args, **kwargs): return self._compute(*args, **kwargs) @@ -194,11 +199,11 @@ def set_execution_method(self, method): If "parallel", execute graph operations concurrently using a threadpool. """ - choices = ['parallel', 'sequential'] + choices = ["parallel", "sequential"] if method not in choices: raise ValueError( - "Invalid computation method %r! Must be one of %s" - (method, choices)) + "Invalid computation method %r! Must be one of %s"(method, choices) + ) self._execution_method = method def set_overwrites_collector(self, collector): @@ -215,11 +220,11 @@ def set_overwrites_collector(self, collector): """ if collector is not None and not isinstance(collector, abc.MutableMapping): raise ValueError( - "Overwrites collector was not a MutableMapping, but: %r" - % collector) + "Overwrites collector was not a MutableMapping, but: %r" % collector + ) self._overwrites_collector = collector def __getstate__(self): state = Operation.__getstate__(self) - state['net'] = self.__dict__['net'] + state["net"] = self.__dict__["net"] return state diff --git a/graphkit/functional.py b/graphkit/functional.py index 9c0348c7..2fb0d29d 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -10,7 +10,7 @@ class FunctionalOperation(Operation): def __init__(self, **kwargs): - self.fn = kwargs.pop('fn') + self.fn = kwargs.pop("fn") Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): @@ -19,7 +19,7 @@ def _compute(self, named_inputs, outputs=None): inputs = [ named_inputs[n] for n in self.needs - if 'optional' not in self.net.graph.get_edge_data(n, self) + if "optional" not in self.net.graph.get_edge_data(n, self) and not isinstance(n, sideffect) ] @@ -28,8 +28,7 @@ def _compute(self, named_inputs, outputs=None): optionals = { n: named_inputs[n] for n in self.needs - if 'optional' in self.net.graph.get_edge_data(n, self) - and n in named_inputs + if "optional" in self.net.graph.get_edge_data(n, self) and n in named_inputs } # Combine params and optionals into one big glob of keyword arguments. @@ -58,7 +57,7 @@ def __call__(self, *args, **kwargs): def __getstate__(self): state = Operation.__getstate__(self) - state['fn'] = self.__dict__['fn'] + state["fn"] = self.__dict__["fn"] return state @@ -96,24 +95,26 @@ def __init__(self, fn=None, **kwargs): def _normalize_kwargs(self, kwargs): # Allow single value for needs parameter - needs = kwargs['needs'] + needs = kwargs["needs"] if isinstance(needs, str) and not isinstance(needs, optional): assert needs, "empty string provided for `needs` parameters" - kwargs['needs'] = [needs] + kwargs["needs"] = [needs] # Allow single value for provides parameter - provides = kwargs.get('provides') + provides = kwargs.get("provides") if isinstance(provides, str): assert provides, "empty string provided for `needs` parameters" - kwargs['provides'] = [provides] + kwargs["provides"] = [provides] - assert kwargs['name'], "operation needs a name" - assert isinstance(kwargs['needs'], list), "no `needs` parameter provided" - assert isinstance(kwargs['provides'], list), "no `provides` parameter provided" - assert hasattr(kwargs['fn'], '__call__'), "operation was not provided with a callable" + assert kwargs["name"], "operation needs a name" + assert isinstance(kwargs["needs"], list), "no `needs` parameter provided" + assert isinstance(kwargs["provides"], list), "no `provides` parameter provided" + assert hasattr( + kwargs["fn"], "__call__" + ), "operation was not provided with a callable" - if type(kwargs['params']) is not dict: - kwargs['params'] = {} + if type(kwargs["params"]) is not dict: + kwargs["params"] = {} return kwargs @@ -156,14 +157,13 @@ def __repr__(self): """ func_name = getattr(self, "fn") func_name = func_name and getattr(func_name, "__name__", None) - return u"%s(name='%s', needs=%s, provides=%s, fn=%s)" % \ - (self.__class__.__name__, - getattr(self, "name", None), - getattr(self, "needs", None), - getattr(self, "provides", None), - func_name) - - + return u"%s(name='%s', needs=%s, provides=%s, fn=%s)" % ( + self.__class__.__name__, + getattr(self, "name", None), + getattr(self, "needs", None), + getattr(self, "provides", None), + func_name, + ) class compose(object): @@ -228,4 +228,6 @@ def __call__(self, *operations): for op in operations: net.add_op(op) - return NetworkOperation(name=self.name, needs=needs, provides=provides, params={}, net=net) + return NetworkOperation( + name=self.name, needs=needs, provides=provides, params={}, net=net + ) diff --git a/graphkit/network.py b/graphkit/network.py index f81632e9..3ca6d0fb 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -83,6 +83,7 @@ from networkx import DiGraph + if sys.version_info < (3, 6): """ Consistently ordered variant of :class:`~networkx.DiGraph`. @@ -101,6 +102,7 @@ class DataPlaceholderNode(str): """ Dag node naming a data-value produced or required by an operation. """ + def __repr__(self): return 'DataPlaceholderNode("%s")' % self @@ -113,6 +115,7 @@ class DeleteInstruction(str): frees its data-value from `solution` after it is no longer needed, to reduce memory footprint while computing the graph. """ + def __repr__(self): return 'DeleteInstruction("%s")' % self @@ -129,6 +132,7 @@ class PinInstruction(str): its providing function(s) could not be pruned, because their other outputs are needed elesewhere. """ + def __repr__(self): return 'PinInstruction("%s")' % self @@ -157,7 +161,7 @@ def __init__(self, **kwargs): def _build_pydot(self, **kws): from .plot import build_pydot - kws.setdefault('graph', self.graph) + kws.setdefault("graph", self.graph) return build_pydot(**kws) @@ -195,7 +199,6 @@ def add_op(self, operation): for p in operation.provides: self.graph.add_edge(operation, DataPlaceholderNode(p)) - def _build_execution_steps(self, dag, inputs, outputs): """ Create the list of operation-nodes & *instructions* evaluating all @@ -244,7 +247,7 @@ def _build_execution_steps(self, dag, inputs, outputs): # is no longer needed by future Operations. for need in self.graph.pred[node]: log.debug("checking if node %s can be deleted", need) - for future_node in ordered_nodes[i+1:]: + for future_node in ordered_nodes[i + 1 :]: if ( isinstance(future_node, Operation) and need in future_node.needs @@ -295,8 +298,9 @@ def _collect_unsatisfied_operations(self, dag, inputs): else: # It's ok not to dig into edge-data("optional") here, # we care about all needs, including broken ones. - real_needs = set(n for n in node.needs - if not isinstance(n, optional)) + real_needs = set( + n for n in node.needs if not isinstance(n, optional) + ) if real_needs.issubset(op_satisfaction[node]): # We have a satisfied operation; mark its output-data # as ok. @@ -304,8 +308,8 @@ def _collect_unsatisfied_operations(self, dag, inputs): else: # Prune operations with partial inputs. unsatisfied.append(node) - elif isinstance(node, (DataPlaceholderNode, str)): # `str` are givens - if node in ok_data: + elif isinstance(node, (DataPlaceholderNode, str)): # `str` are givens + if node in ok_data: # mark satisfied-needs on all future operations for future_op in dag.adj[node]: op_satisfaction[future_op].add(node) @@ -314,7 +318,6 @@ def _collect_unsatisfied_operations(self, dag, inputs): return unsatisfied - def _prune_graph(self, outputs, inputs): """ Determines what graph steps need to run to get to the requested @@ -342,8 +345,8 @@ def _prune_graph(self, outputs, inputs): unknown_outputs = iset(outputs) - dag.nodes if unknown_outputs: raise ValueError( - "Unknown output node(s) requested: %s" - % ", ".join(unknown_outputs)) + "Unknown output node(s) requested: %s" % ", ".join(unknown_outputs) + ) broken_dag = dag.copy() # preserve net's graph @@ -369,7 +372,6 @@ def _prune_graph(self, outputs, inputs): ending_in_outputs.update(nx.ancestors(dag, input_name)) broken_dag = broken_dag.subgraph(ending_in_outputs | set(outputs)) - # Prune unsatisfied operations (those with partial inputs or no outputs). unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) # Clone it so that it is picklable. @@ -399,7 +401,7 @@ def compile(self, inputs=(), outputs=()): if not outputs: outputs = () elif isinstance(outputs, str): - outputs = (outputs, ) + outputs = (outputs,) # Make a stable cache-key cache_key = (tuple(sorted(inputs)), tuple(sorted(outputs))) @@ -428,8 +430,7 @@ def compile(self, inputs=(), outputs=()): return plan - def compute( - self, named_inputs, outputs, method=None, overwrites_collector=None): + def compute(self, named_inputs, outputs, method=None, overwrites_collector=None): """ Solve & execute the graph, sequentially or parallel. @@ -455,8 +456,9 @@ def compute( :returns: a dictionary of output data objects, keyed by name. """ - assert isinstance(outputs, (list, tuple)) or outputs is None,\ - "The outputs argument must be a list" + assert ( + isinstance(outputs, (list, tuple)) or outputs is None + ), "The outputs argument must be a list" # Build the execution plan. self.last_plan = plan = self.compile(named_inputs.keys(), outputs) @@ -478,7 +480,7 @@ def compute( class ExecutionPlan( namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps executed"), - plot.Plotter + plot.Plotter, ): """ The result of the network's compilation phase. @@ -525,7 +527,9 @@ def _build_pydot(self, **kws): "inputs": self.inputs, "outputs": self.outputs, "executed": self.executed, - "edge_props": {e: {"color": "wheat", "penwidth": 2} for e in self.broken_edges}, + "edge_props": { + e: {"color": "wheat", "penwidth": 2} for e in self.broken_edges + }, "clusters": clusters, } mykws.update(kws) @@ -534,9 +538,11 @@ def _build_pydot(self, **kws): def __repr__(self): steps = ["\n +--%s" % s for s in self.steps] - return ( - "ExecutionPlan(inputs=%s, outputs=%s, steps:%s)" - % (self.inputs, self.outputs, ''.join(steps))) + return "ExecutionPlan(inputs=%s, outputs=%s, steps:%s)" % ( + self.inputs, + self.outputs, + "".join(steps), + ) def get_data_node(self, name): """ @@ -560,8 +566,9 @@ def _can_schedule_operation(self, op): """ # Use `broken_dag` to allow executing operations after given inputs # regardless of whether their producers have yet to run. - dependencies = set(n for n in nx.ancestors(self.broken_dag, op) - if isinstance(n, Operation)) + dependencies = set( + n for n in nx.ancestors(self.broken_dag, op) if isinstance(n, Operation) + ) return dependencies.issubset(self.executed) def _can_evict_value(self, name): @@ -576,8 +583,9 @@ def _can_evict_value(self, name): data_node = self.get_data_node(name) # Use `broken_dag` not to block a successor waiting for this data, # since in any case will use a given input, not some pipe of this data. - return data_node and set( - self.broken_dag.successors(data_node)).issubset(self.executed) + return data_node and set(self.broken_dag.successors(data_node)).issubset( + self.executed + ) def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): value_name = str(value_name) @@ -593,8 +601,8 @@ def _call_operation(self, op, solution): ex.execution_plan = self raise - def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, - thread_pool_size=10 + def _execute_thread_pool_barrier_method( + self, inputs, solution, overwrites, thread_pool_size=10 ): """ This method runs the graph using a parallel pool of thread executors. @@ -627,10 +635,7 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, # Only delete if all successors for the data node # have been executed. # An optional need may not have a value in the solution. - if ( - node in solution - and self._can_evict_value(node) - ): + if node in solution and self._can_evict_value(node): log.debug("removing data '%s' from solution.", node) del solution[node] elif isinstance(node, PinInstruction): @@ -638,9 +643,7 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, # providers of the data have executed. # An optional need may not have a value in the solution. if node in solution: - self._pin_data_in_solution( - node, solution, inputs, overwrites) - + self._pin_data_in_solution(node, solution, inputs, overwrites) # stop if no nodes left to schedule, exit out of the loop if len(upnext) == 0: @@ -648,13 +651,13 @@ def _execute_thread_pool_barrier_method(self, inputs, solution, overwrites, ## TODO: accept pool from caller done_iterator = pool.imap_unordered( - (lambda op: (op, self._call_operation(op, solution))), upnext) + (lambda op: (op, self._call_operation(op, solution))), upnext + ) for op, result in done_iterator: solution.update(result) self.executed.add(op) - def _execute_sequential_method(self, inputs, solution, overwrites): """ This method runs the graph one operation at a time in a single thread @@ -664,7 +667,7 @@ def _execute_sequential_method(self, inputs, solution, overwrites): if isinstance(step, Operation): - log.debug("%sexecuting step: %s", "-"*32, step.name) + log.debug("%sexecuting step: %s", "-" * 32, step.name) # time execution... t0 = time.time() @@ -708,9 +711,11 @@ def execute(self, solution, overwrites=None, method=None): self.executed.clear() # choose a method of execution - executor = (self._execute_thread_pool_barrier_method - if method == "parallel" else - self._execute_sequential_method) + executor = ( + self._execute_thread_pool_barrier_method + if method == "parallel" + else self._execute_sequential_method + ) # clone and keep orignal inputs in solution intact executor(dict(solution), solution, overwrites) @@ -718,6 +723,7 @@ def execute(self, solution, overwrites=None, method=None): # return it, but caller can also see the results in `solution` dict. return solution + # TODO: maybe class Solution(object): # values = {} # overwrites = None diff --git a/setup.py b/setup.py index 1cec5132..e9cb145c 100644 --- a/setup.py +++ b/setup.py @@ -12,68 +12,66 @@ # Grab the version using convention described by flask # https://github.com/pallets/flask/blob/master/setup.py#L10 -with io.open('graphkit/__init__.py', 'rt', encoding='utf8') as f: - version = re.search(r'__version__ = \'(.*?)\'', f.read()).group(1) +with io.open("graphkit/__init__.py", "rt", encoding="utf8") as f: + version = re.search(r'__version__ = "(.*?)"', f.read()).group(1) -plot_reqs = [ - "matplotlib", # to test plot - "pydot", # to test plot -] -test_reqs = plot_reqs + [ - "pytest", - "pytest-cov", - "pytest-sphinx", -] +plot_reqs = ["matplotlib", "pydot"] # to test plot # to test plot +test_reqs = plot_reqs + ["pytest", "pytest-cov", "pytest-sphinx"] setup( - name='graphkit', - version=version, - description='Lightweight computation graphs for Python', - long_description=long_description, - author='Huy Nguyen, Arel Cordero, Pierre Garrigues, Rob Hess, ' - 'Tobi Baumgartner, Clayton Mellina, ankostis@gmail.com', - author_email='huyng@yahoo-inc.com', - url='http://github.com/yahoo/graphkit', - project_urls={ - "Documentation": "https://pythonhosted.org/graphkit/", - "Release Notes": "https://pythonhosted.org/graphkit/changes.html", - "Sources": "https://github.com/yahoo/graphkit", - "Bug Tracker": "https://github.com/yahoo/graphkit/issues", - }, - packages=['graphkit'], - install_requires=[ - "networkx; python_version >= '3.5'", - "networkx == 2.2; python_version < '3.5'", - "boltons" # for IndexSet - ], - extras_require={ - 'plot': plot_reqs, - 'test': test_reqs, - }, - tests_require=test_reqs, - license='Apache-2.0', - keywords=[ - 'graph', 'computation graph', 'DAG', 'directed acyclical graph', - 'executor', 'scheduler', 'etl', 'workflow', 'pipeline'], - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'License :: OSI Approved :: Apache Software License', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'Natural Language :: English', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: Microsoft :: Windows', - 'Operating System :: POSIX', - 'Operating System :: POSIX', - 'Operating System :: Unix', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Topic :: Scientific/Engineering', - 'Topic :: Software Development' + name="graphkit", + version=version, + description="Lightweight computation graphs for Python", + long_description=long_description, + author="Huy Nguyen, Arel Cordero, Pierre Garrigues, Rob Hess, " + "Tobi Baumgartner, Clayton Mellina, ankostis@gmail.com", + author_email="huyng@yahoo-inc.com", + url="http://github.com/yahoo/graphkit", + project_urls={ + "Documentation": "https://pythonhosted.org/graphkit/", + "Release Notes": "https://pythonhosted.org/graphkit/changes.html", + "Sources": "https://github.com/yahoo/graphkit", + "Bug Tracker": "https://github.com/yahoo/graphkit/issues", + }, + packages=["graphkit"], + install_requires=[ + "networkx; python_version >= '3.5'", + "networkx == 2.2; python_version < '3.5'", + "boltons", # for IndexSet + ], + extras_require={"plot": plot_reqs, "test": test_reqs}, + tests_require=test_reqs, + license="Apache-2.0", + keywords=[ + "graph", + "computation graph", + "DAG", + "directed acyclical graph", + "executor", + "scheduler", + "etl", + "workflow", + "pipeline", + ], + classifiers=[ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: Apache Software License", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Natural Language :: English", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Operating System :: POSIX", + "Operating System :: Unix", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Topic :: Scientific/Engineering", + "Topic :: Software Development", ], zip_safe=True, - platforms='Windows,Linux,Solaris,Mac OS-X,Unix' + platforms="Windows,Linux,Solaris,Mac OS-X,Unix", ) diff --git a/test/test_functional.py b/test/test_functional.py index c81dc44b..13cd1270 100644 --- a/test/test_functional.py +++ b/test/test_functional.py @@ -6,17 +6,17 @@ from graphkit import Operation, operation -@pytest.fixture(params=[None, ['some']]) +@pytest.fixture(params=[None, ["some"]]) def opname(request): return request.param -@pytest.fixture(params=[None, ['some']]) +@pytest.fixture(params=[None, ["some"]]) def opneeds(request): return request.param -@pytest.fixture(params=[None, ['some']]) +@pytest.fixture(params=[None, ["some"]]) def opprovides(request): return request.param diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 0856f3ca..6ac57ff3 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -16,7 +16,8 @@ def scream(*args, **kwargs): raise AssertionError( - "Must not have run!\n args: %s\n kwargs: %s", (args, kwargs)) + "Must not have run!\n args: %s\n kwargs: %s", (args, kwargs) + ) def identity(x): @@ -36,13 +37,13 @@ def filtdict(d, *keys): def test_network_smoke(): # Sum operation, late-bind compute function - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum_ab')(add) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum_ab")(add) # sum_op1 is callable assert sum_op1(1, 2) == 3 # Multiply operation, decorate in-place - @operation(name='mul_op1', needs=['sum_ab', 'b'], provides='sum_ab_times_b') + @operation(name="mul_op1", needs=["sum_ab", "b"], provides="sum_ab_times_b") def mul_op1(a, b): return a * b @@ -50,16 +51,23 @@ def mul_op1(a, b): assert mul_op1(1, 2) == 2 # Pow operation - @operation(name='pow_op1', needs='sum_ab', provides=['sum_ab_p1', 'sum_ab_p2', 'sum_ab_p3'], params={'exponent': 3}) + @operation( + name="pow_op1", + needs="sum_ab", + provides=["sum_ab_p1", "sum_ab_p2", "sum_ab_p3"], + params={"exponent": 3}, + ) def pow_op1(a, exponent=2): - return [math.pow(a, y) for y in range(1, exponent+1)] + return [math.pow(a, y) for y in range(1, exponent + 1)] # `_compute()` needs a` nx-DiGraph in op's `net` attribute. compose("mock graph")(pow_op1) - assert pow_op1._compute({'sum_ab':2}, ['sum_ab_p2']) == {'sum_ab_p2': 4.0} + assert pow_op1._compute({"sum_ab": 2}, ["sum_ab_p2"]) == {"sum_ab_p2": 4.0} # Partial operation that is bound at a later time - partial_op = operation(name='sum_op2', needs=['sum_ab_p1', 'sum_ab_p2'], provides='p1_plus_p2') + partial_op = operation( + name="sum_op2", needs=["sum_ab_p1", "sum_ab_p2"], provides="p1_plus_p2" + ) # Bind the partial operation sum_op2 = partial_op(add) @@ -67,36 +75,38 @@ def pow_op1(a, exponent=2): # Sum operation, early-bind compute function sum_op_factory = operation(add) - sum_op3 = sum_op_factory(name='sum_op3', needs=['a', 'b'], provides='sum_ab2') + sum_op3 = sum_op_factory(name="sum_op3", needs=["a", "b"], provides="sum_ab2") # sum_op3 is callable assert sum_op3(5, 6) == 11 # compose network - net = compose(name='my network')(sum_op1, mul_op1, pow_op1, sum_op2, sum_op3) + net = compose(name="my network")(sum_op1, mul_op1, pow_op1, sum_op2, sum_op3) # # Running the network # # get all outputs - exp = {'a': 1, - 'b': 2, - 'p1_plus_p2': 12.0, - 'sum_ab': 3, - 'sum_ab2': 3, - 'sum_ab_p1': 3.0, - 'sum_ab_p2': 9.0, - 'sum_ab_p3': 27.0, - 'sum_ab_times_b': 6} - assert net({'a': 1, 'b': 2}) == exp + exp = { + "a": 1, + "b": 2, + "p1_plus_p2": 12.0, + "sum_ab": 3, + "sum_ab2": 3, + "sum_ab_p1": 3.0, + "sum_ab_p2": 9.0, + "sum_ab_p3": 27.0, + "sum_ab_times_b": 6, + } + assert net({"a": 1, "b": 2}) == exp # get specific outputs - exp = {'sum_ab_times_b': 6} - assert net({'a': 1, 'b': 2}, outputs=["sum_ab_times_b"]) == exp + exp = {"sum_ab_times_b": 6} + assert net({"a": 1, "b": 2}, outputs=["sum_ab_times_b"]) == exp # start with inputs already computed - exp = {'sum_ab_times_b': 2} + exp = {"sum_ab_times_b": 2} assert net({"sum_ab": 1, "b": 2}, outputs=["sum_ab_times_b"]) == exp # visualize network graph @@ -105,48 +115,58 @@ def pow_op1(a, exponent=2): def test_network_simple_merge(): - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) - net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op2 = operation(name="sum_op2", needs=["a", "b"], provides="sum2")(add) + sum_op3 = operation(name="sum_op3", needs=["sum1", "c"], provides="sum3")(add) + net1 = compose(name="my network 1")(sum_op1, sum_op2, sum_op3) - exp = {'a': 1, 'b': 2, 'c': 4, 'sum1': 3, 'sum2': 3, 'sum3': 7} - sol = net1({'a': 1, 'b': 2, 'c': 4}) + exp = {"a": 1, "b": 2, "c": 4, "sum1": 3, "sum2": 3, "sum3": 7} + sol = net1({"a": 1, "b": 2, "c": 4}) assert sol == exp - sum_op4 = operation(name='sum_op1', needs=['d', 'e'], provides='a')(add) - sum_op5 = operation(name='sum_op2', needs=['a', 'f'], provides='b')(add) + sum_op4 = operation(name="sum_op1", needs=["d", "e"], provides="a")(add) + sum_op5 = operation(name="sum_op2", needs=["a", "f"], provides="b")(add) - net2 = compose(name='my network 2')(sum_op4, sum_op5) - exp = {'a': 3, 'b': 7, 'd': 1, 'e': 2, 'f': 4} - sol = net2({'d': 1, 'e': 2, 'f': 4}) + net2 = compose(name="my network 2")(sum_op4, sum_op5) + exp = {"a": 3, "b": 7, "d": 1, "e": 2, "f": 4} + sol = net2({"d": 1, "e": 2, "f": 4}) assert sol == exp - net3 = compose(name='merged')(net1, net2) - exp = {'a': 3, 'b': 7, 'c': 5, 'd': 1, 'e': 2, 'f': 4, 'sum1': 10, 'sum2': 10, 'sum3': 15} - sol = net3({'c': 5, 'd': 1, 'e': 2, 'f': 4}) + net3 = compose(name="merged")(net1, net2) + exp = { + "a": 3, + "b": 7, + "c": 5, + "d": 1, + "e": 2, + "f": 4, + "sum1": 10, + "sum2": 10, + "sum3": 15, + } + sol = net3({"c": 5, "d": 1, "e": 2, "f": 4}) assert sol == exp def test_network_deep_merge(): - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['sum1', 'c'], provides='sum3')(add) - net1 = compose(name='my network 1')(sum_op1, sum_op2, sum_op3) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op2 = operation(name="sum_op2", needs=["a", "b"], provides="sum2")(add) + sum_op3 = operation(name="sum_op3", needs=["sum1", "c"], provides="sum3")(add) + net1 = compose(name="my network 1")(sum_op1, sum_op2, sum_op3) - exp = {'a': 1, 'b': 2, 'c': 4, 'sum1': 3, 'sum2': 3, 'sum3': 7} - assert net1({'a': 1, 'b': 2, 'c': 4}) == exp + exp = {"a": 1, "b": 2, "c": 4, "sum1": 3, "sum2": 3, "sum3": 7} + assert net1({"a": 1, "b": 2, "c": 4}) == exp - sum_op4 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op5 = operation(name='sum_op4', needs=['sum1', 'b'], provides='sum2')(add) - net2 = compose(name='my network 2')(sum_op4, sum_op5) - exp = {'a': 1, 'b': 2, 'sum1': 3, 'sum2': 5} - assert net2({'a': 1, 'b': 2}) == exp + sum_op4 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op5 = operation(name="sum_op4", needs=["sum1", "b"], provides="sum2")(add) + net2 = compose(name="my network 2")(sum_op4, sum_op5) + exp = {"a": 1, "b": 2, "sum1": 3, "sum2": 5} + assert net2({"a": 1, "b": 2}) == exp - net3 = compose(name='merged', merge=True)(net1, net2) - exp = {'a': 1, 'b': 2, 'c': 4, 'sum1': 3, 'sum2': 3, 'sum3': 7} - assert net3({'a': 1, 'b': 2, 'c': 4}) == exp + net3 = compose(name="merged", merge=True)(net1, net2) + exp = {"a": 1, "b": 2, "c": 4, "sum1": 3, "sum2": 3, "sum3": 7} + assert net3({"a": 1, "b": 2, "c": 4}) == exp def test_network_merge_in_doctests(): @@ -157,13 +177,17 @@ def abspow(a, p): graphop = compose(name="graphop")( operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), - operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3}) - (abspow) + operation( + name="abspow1", + needs=["a_minus_ab"], + provides=["abs_a_minus_ab_cubed"], + params={"p": 3}, + )(abspow), ) another_graph = compose(name="another_graph")( - operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), - operation(name="mul2", needs=["c", "ab"], provides=["cab"])(mul) + operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + operation(name="mul2", needs=["c", "ab"], provides=["cab"])(mul), ) merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) assert merged_graph.needs @@ -179,16 +203,16 @@ def test_input_based_pruning(): # Set up a net such that if sum1 and sum2 are provided directly, we don't # need to provide a and b. - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['a', 'b'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['sum1', 'sum2'], provides='sum3')(add) - net = compose(name='test_net')(sum_op1, sum_op2, sum_op3) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op2 = operation(name="sum_op2", needs=["a", "b"], provides="sum2")(add) + sum_op3 = operation(name="sum_op3", needs=["sum1", "sum2"], provides="sum3")(add) + net = compose(name="test_net")(sum_op1, sum_op2, sum_op3) - results = net({'sum1': sum1, 'sum2': sum2}) + results = net({"sum1": sum1, "sum2": sum2}) # Make sure we got expected result without having to pass a or b. - assert 'sum3' in results - assert results['sum3'] == add(sum1, sum2) + assert "sum3" in results + assert results["sum3"] == add(sum1, sum2) def test_output_based_pruning(): @@ -200,16 +224,16 @@ def test_output_based_pruning(): # Set up a network such that we don't need to provide a or b if we only # request sum3 as output. - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['c', 'd'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['c', 'sum2'], provides='sum3')(add) - net = compose(name='test_net')(sum_op1, sum_op2, sum_op3) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op2 = operation(name="sum_op2", needs=["c", "d"], provides="sum2")(add) + sum_op3 = operation(name="sum_op3", needs=["c", "sum2"], provides="sum3")(add) + net = compose(name="test_net")(sum_op1, sum_op2, sum_op3) - results = net({'a': 0, 'b': 0, 'c': c, 'd': d}, outputs=['sum3']) + results = net({"a": 0, "b": 0, "c": c, "d": d}, outputs=["sum3"]) # Make sure we got expected result without having to pass a or b. - assert 'sum3' in results - assert results['sum3'] == add(c, add(c, d)) + assert "sum3" in results + assert results["sum3"] == add(c, add(c, d)) def test_input_output_based_pruning(): @@ -222,16 +246,16 @@ def test_input_output_based_pruning(): # Set up a network such that we don't need to provide a or b d if we only # request sum3 as output and if we provide sum2. - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['c', 'd'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['c', 'sum2'], provides='sum3')(add) - net = compose(name='test_net')(sum_op1, sum_op2, sum_op3) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op2 = operation(name="sum_op2", needs=["c", "d"], provides="sum2")(add) + sum_op3 = operation(name="sum_op3", needs=["c", "sum2"], provides="sum3")(add) + net = compose(name="test_net")(sum_op1, sum_op2, sum_op3) - results = net({'c': c, 'sum2': sum2}, outputs=['sum3']) + results = net({"c": c, "sum2": sum2}, outputs=["sum3"]) # Make sure we got expected result without having to pass a, b, or d. - assert 'sum3' in results - assert results['sum3'] == add(c, sum2) + assert "sum3" in results + assert results["sum3"] == add(c, sum2) def test_pruning_raises_for_bad_output(): @@ -240,17 +264,17 @@ def test_pruning_raises_for_bad_output(): # Set up a network that doesn't have the output sum4, which we'll request # later. - sum_op1 = operation(name='sum_op1', needs=['a', 'b'], provides='sum1')(add) - sum_op2 = operation(name='sum_op2', needs=['c', 'd'], provides='sum2')(add) - sum_op3 = operation(name='sum_op3', needs=['c', 'sum2'], provides='sum3')(add) - net = compose(name='test_net')(sum_op1, sum_op2, sum_op3) + sum_op1 = operation(name="sum_op1", needs=["a", "b"], provides="sum1")(add) + sum_op2 = operation(name="sum_op2", needs=["c", "d"], provides="sum2")(add) + sum_op3 = operation(name="sum_op3", needs=["c", "sum2"], provides="sum3")(add) + net = compose(name="test_net")(sum_op1, sum_op2, sum_op3) # Request two outputs we can compute and one we can't compute. Assert # that this raises a ValueError. with pytest.raises(ValueError) as exinfo: - net({'a': 1, 'b': 2, 'c': 3, 'd': 4}, - outputs=['sum1', 'sum3', 'sum4']) - assert exinfo.match('sum4') + net({"a": 1, "b": 2, "c": 3, "d": 4}, outputs=["sum1", "sum3", "sum4"]) + assert exinfo.match("sum4") + def test_pruning_not_overrides_given_intermediate(): # Test #25: v1.2.4 overwrites intermediate data when no output asked @@ -286,7 +310,7 @@ def test_pruning_not_overrides_given_intermediate(): pipeline.set_execution_method("parallel") overwrites = {} pipeline.set_overwrites_collector(overwrites) - #assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") + # assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") assert overwrites == {} # unjust must have been pruned overwrites = {} @@ -299,8 +323,9 @@ def test_pruning_multiouts_not_override_intermediates1(): # Test #25: v.1.2.4 overwrites intermediate data when a previous operation # must run for its other outputs (outputs asked or not) pipeline = compose(name="pipeline")( - operation(name="must run", needs=["a"], provides=["overriden", "calced"]) - (lambda x: (x, 2 * x)), + operation(name="must run", needs=["a"], provides=["overriden", "calced"])( + lambda x: (x, 2 * x) + ), operation(name="add", needs=["overriden", "calced"], provides=["asked"])(add), ) @@ -322,12 +347,12 @@ def test_pruning_multiouts_not_override_intermediates1(): overwrites = {} pipeline.set_overwrites_collector(overwrites) assert pipeline({"a": 5, "overriden": 1}) == exp - assert overwrites == {'overriden': 5} + assert overwrites == {"overriden": 5} overwrites = {} pipeline.set_overwrites_collector(overwrites) assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") - assert overwrites == {'overriden': 5} + assert overwrites == {"overriden": 5} ## Test parallel # @@ -342,8 +367,9 @@ def test_pruning_multiouts_not_override_intermediates2(): # SPURIOUS FAILS in < PY3.6 due to unordered dicts, # eg https://travis-ci.org/ankostis/graphkit/jobs/594813119 pipeline = compose(name="pipeline")( - operation(name="must run", needs=["a"], provides=["overriden", "e"]) - (lambda x: (x, 2 * x)), + operation(name="must run", needs=["a"], provides=["overriden", "e"])( + lambda x: (x, 2 * x) + ), operation(name="op1", needs=["overriden", "c"], provides=["d"])(add), operation(name="op2", needs=["d", "e"], provides=["asked"])(mul), ) @@ -366,12 +392,12 @@ def test_pruning_multiouts_not_override_intermediates2(): overwrites = {} pipeline.set_overwrites_collector(overwrites) assert pipeline(inputs) == exp - assert overwrites == {'overriden': 5} + assert overwrites == {"overriden": 5} overwrites = {} pipeline.set_overwrites_collector(overwrites) assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") - assert overwrites == {'overriden': 5} + assert overwrites == {"overriden": 5} ## Test parallel # @@ -396,7 +422,9 @@ def test_pruning_with_given_intermediate_and_asked_out(): # - on v1.2.4 with KeyError: 'a', # - on #18 (unsatisfied) with no result. # FIXED on #18+#26 (new dag solver). - assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict( + exp, "asked" + ) ## Test OVERWITES # @@ -407,7 +435,9 @@ def test_pruning_with_given_intermediate_and_asked_out(): overwrites = {} pipeline.set_overwrites_collector(overwrites) - assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict( + exp, "asked" + ) assert overwrites == {} ## Test parallel @@ -415,7 +445,10 @@ def test_pruning_with_given_intermediate_and_asked_out(): # pipeline.set_execution_method("parallel") assert pipeline({"given-1": 5, "b": 2, "given-2": 2}) == exp - assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict(exp, "asked") + assert pipeline({"given-1": 5, "b": 2, "given-2": 2}, ["asked"]) == filtdict( + exp, "asked" + ) + def test_unsatisfied_operations(): # Test that operations with partial inputs are culled and not failing. @@ -443,6 +476,7 @@ def test_unsatisfied_operations(): assert pipeline({"a": 10, "b2": 2}) == exp assert pipeline({"a": 10, "b2": 2}, outputs=["a-b2"]) == filtdict(exp, "a-b2") + def test_unsatisfied_operations_same_out(): # Test unsatisfied pairs of operations providing the same output. pipeline = compose(name="pipeline")( @@ -453,11 +487,15 @@ def test_unsatisfied_operations_same_out(): exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp - assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict( + exp, "ab_plus_c" + ) exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp - assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict( + exp, "ab_plus_c" + ) ## Test parallel # @@ -465,12 +503,16 @@ def test_unsatisfied_operations_same_out(): pipeline.set_execution_method("parallel") exp = {"a": 10, "b1": 2, "c": 1, "ab": 20, "ab_plus_c": 21} assert pipeline({"a": 10, "b1": 2, "c": 1}) == exp - assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + assert pipeline({"a": 10, "b1": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict( + exp, "ab_plus_c" + ) # # FAIL! in #26 exp = {"a": 10, "b2": 2, "c": 1, "ab": 5, "ab_plus_c": 6} assert pipeline({"a": 10, "b2": 2, "c": 1}) == exp - assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict(exp, "ab_plus_c") + assert pipeline({"a": 10, "b2": 2, "c": 1}, outputs=["ab_plus_c"]) == filtdict( + exp, "ab_plus_c" + ) def test_optional(): @@ -480,21 +522,24 @@ def test_optional(): def addplusplus(a, b, c=0): return a + b + c - sum_op = operation(name='sum_op1', needs=['a', 'b', modifiers.optional('c')], provides='sum')(addplusplus) + sum_op = operation( + name="sum_op1", needs=["a", "b", modifiers.optional("c")], provides="sum" + )(addplusplus) - net = compose(name='test_net')(sum_op) + net = compose(name="test_net")(sum_op) # Make sure output with optional arg is as expected. - named_inputs = {'a': 4, 'b': 3, 'c': 2} + named_inputs = {"a": 4, "b": 3, "c": 2} results = net(named_inputs) - assert 'sum' in results - assert results['sum'] == sum(named_inputs.values()) + assert "sum" in results + assert results["sum"] == sum(named_inputs.values()) # Make sure output without optional arg is as expected. - named_inputs = {'a': 4, 'b': 3} + named_inputs = {"a": 4, "b": 3} results = net(named_inputs) - assert 'sum' in results - assert results['sum'] == sum(named_inputs.values()) + assert "sum" in results + assert results["sum"] == sum(named_inputs.values()) + def test_sideffects(): # Function without return value. @@ -506,32 +551,36 @@ def increment(box): box[i] += 1 # Designate `a`, `b` as sideffect inp/out arguments. - graph = compose('mygraph')( + graph = compose("mygraph")( operation( - name='extend', - needs=['box', modifiers.sideffect('a')], - provides=[modifiers.sideffect('b')])(extend), + name="extend", + needs=["box", modifiers.sideffect("a")], + provides=[modifiers.sideffect("b")], + )(extend), operation( - name='increment', - needs=['box', modifiers.sideffect('b')], - provides=modifiers.sideffect('c'))(increment), + name="increment", + needs=["box", modifiers.sideffect("b")], + provides=modifiers.sideffect("c"), + )(increment), ) - assert graph({'box': [0], 'a': True})['box'] == [1, 2, 3] + assert graph({"box": [0], "a": True})["box"] == [1, 2, 3] # Reverse order of functions. - graph = compose('mygraph')( + graph = compose("mygraph")( operation( - name='increment', - needs=['box', modifiers.sideffect('a')], - provides=modifiers.sideffect('b'))(increment), + name="increment", + needs=["box", modifiers.sideffect("a")], + provides=modifiers.sideffect("b"), + )(increment), operation( - name='extend', - needs=['box', modifiers.sideffect('b')], - provides=[modifiers.sideffect('c')])(extend), + name="extend", + needs=["box", modifiers.sideffect("b")], + provides=[modifiers.sideffect("c")], + )(extend), ) - assert graph({'box': [0], 'a': None})['box'] == [1, 1, 2] + assert graph({"box": [0], "a": None})["box"] == [1, 1, 2] def test_optional_per_function_with_same_output(): @@ -540,60 +589,60 @@ def test_optional_per_function_with_same_output(): ## ATTENTION, the selected function is NOT the one with more inputs # but the 1st satisfiable function added in the network. - add_op = operation(name='add', needs=['a', 'b'], provides='a+-b')(add) + add_op = operation(name="add", needs=["a", "b"], provides="a+-b")(add) sub_op_optional = operation( - name='sub_opt', needs=['a', modifiers.optional('b')], provides='a+-b' + name="sub_opt", needs=["a", modifiers.optional("b")], provides="a+-b" )(lambda a, b=10: a - b) # Normal order # - pipeline = compose(name='partial_optionals')(add_op, sub_op_optional) + pipeline = compose(name="partial_optionals")(add_op, sub_op_optional) # - named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': 3, 'b': 2} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': 3} + named_inputs = {"a": 1, "b": 2} + assert pipeline(named_inputs) == {"a": 1, "a+-b": 3, "b": 2} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": 3} # - named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} + named_inputs = {"a": 1} + assert pipeline(named_inputs) == {"a": 1, "a+-b": -9} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -9} # Inverse op order # - pipeline = compose(name='partial_optionals')(sub_op_optional, add_op) + pipeline = compose(name="partial_optionals")(sub_op_optional, add_op) # - named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': -1, 'b': 2} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -1} + named_inputs = {"a": 1, "b": 2} + assert pipeline(named_inputs) == {"a": 1, "a+-b": -1, "b": 2} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -1} # - named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} + named_inputs = {"a": 1} + assert pipeline(named_inputs) == {"a": 1, "a+-b": -9} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -9} # PARALLEL + Normal order # - pipeline = compose(name='partial_optionals')(add_op, sub_op_optional) + pipeline = compose(name="partial_optionals")(add_op, sub_op_optional) pipeline.set_execution_method("parallel") # - named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': 3, 'b': 2} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': 3} + named_inputs = {"a": 1, "b": 2} + assert pipeline(named_inputs) == {"a": 1, "a+-b": 3, "b": 2} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": 3} # - named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} + named_inputs = {"a": 1} + assert pipeline(named_inputs) == {"a": 1, "a+-b": -9} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -9} # PARALLEL + Inverse op order # - pipeline = compose(name='partial_optionals')(sub_op_optional, add_op) + pipeline = compose(name="partial_optionals")(sub_op_optional, add_op) pipeline.set_execution_method("parallel") # - named_inputs = {'a': 1, 'b': 2} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': -1, 'b': 2} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -1} + named_inputs = {"a": 1, "b": 2} + assert pipeline(named_inputs) == {"a": 1, "a+-b": -1, "b": 2} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -1} # - named_inputs = {'a': 1} - assert pipeline(named_inputs) == {'a': 1, 'a+-b': -9} - assert pipeline(named_inputs, ['a+-b']) == {'a+-b': -9} + named_inputs = {"a": 1} + assert pipeline(named_inputs) == {"a": 1, "a+-b": -9} + assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -9} def test_deleted_optional(): @@ -605,13 +654,15 @@ def addplusplus(a, b, c=0): return a + b + c # Here, a DeleteInstruction will be inserted for the optional need 'c'. - sum_op1 = operation(name='sum_op1', needs=['a', 'b', modifiers.optional('c')], provides='sum1')(addplusplus) - sum_op2 = operation(name='sum_op2', needs=['sum1', 'sum1'], provides='sum2')(add) - net = compose(name='test_net')(sum_op1, sum_op2) + sum_op1 = operation( + name="sum_op1", needs=["a", "b", modifiers.optional("c")], provides="sum1" + )(addplusplus) + sum_op2 = operation(name="sum_op2", needs=["sum1", "sum1"], provides="sum2")(add) + net = compose(name="test_net")(sum_op1, sum_op2) # DeleteInstructions are used only when a subset of outputs are requested. - results = net({'a': 4, 'b': 3}, outputs=['sum2']) - assert 'sum2' in results + results = net({"a": 4, "b": 3}, outputs=["sum2"]) + assert "sum2" in results def test_deleteinstructs_vary_with_inputs(): @@ -622,12 +673,16 @@ def count_deletions(steps): pipeline = compose(name="pipeline")( operation(name="a free without b", needs=["a"], provides=["aa"])(identity), operation(name="satisfiable", needs=["a", "b"], provides=["ab"])(add), - operation(name="optional ab", needs=["aa", modifiers.optional("ab")], provides=["asked"]) - (lambda a, ab=10: a + ab), + operation( + name="optional ab", + needs=["aa", modifiers.optional("ab")], + provides=["asked"], + )(lambda a, ab=10: a + ab), ) inp = {"a": 2, "b": 3} - exp = inp.copy(); exp.update({"aa": 2, "ab": 5, "asked": 7}) + exp = inp.copy() + exp.update({"aa": 2, "ab": 5, "asked": 7}) res = pipeline(inp) assert res == exp # ok steps11 = pipeline.compile(inp).steps @@ -636,7 +691,8 @@ def count_deletions(steps): steps12 = pipeline.compile(inp, ["asked"]).steps inp = {"a": 2} - exp = inp.copy(); exp.update({"aa": 2, "asked": 12}) + exp = inp.copy() + exp.update({"aa": 2, "asked": 12}) res = pipeline(inp) assert res == exp # ok steps21 = pipeline.compile(inp).steps @@ -672,10 +728,10 @@ def fn(x): print("fn %s" % (time.time() - t0)) return 1 + x - def fn2(a,b): + def fn2(a, b): time.sleep(delay) print("fn2 %s" % (time.time() - t0)) - return a+b + return a + b def fn3(z, k=1): time.sleep(delay) @@ -683,23 +739,15 @@ def fn3(z, k=1): return z + k pipeline = compose(name="l", merge=True)( - # the following should execute in parallel under threaded execution mode operation(name="a", needs="x", provides="ao")(fn), operation(name="b", needs="x", provides="bo")(fn), - # this should execute after a and b have finished operation(name="c", needs=["ao", "bo"], provides="co")(fn2), - - operation(name="d", - needs=["ao", modifiers.optional("k")], - provides="do")(fn3), - + operation(name="d", needs=["ao", modifiers.optional("k")], provides="do")(fn3), operation(name="e", needs=["ao", "bo"], provides="eo")(fn2), operation(name="f", needs="eo", provides="fo")(fn), - operation(name="g", needs="fo", provides="go")(fn) - - + operation(name="g", needs="fo", provides="go")(fn), ) t0 = time.time() @@ -717,6 +765,7 @@ def fn3(z, k=1): # make sure results are the same using either method assert result_sequential == result_threaded + @pytest.mark.slow def test_multi_threading(): import time @@ -724,28 +773,31 @@ def test_multi_threading(): from multiprocessing.dummy import Pool def op_a(a, b): - time.sleep(random.random()*.02) - return a+b + time.sleep(random.random() * 0.02) + return a + b def op_b(c, b): - time.sleep(random.random()*.02) - return c+b + time.sleep(random.random() * 0.02) + return c + b def op_c(a, b): - time.sleep(random.random()*.02) - return a*b + time.sleep(random.random() * 0.02) + return a * b pipeline = compose(name="pipeline", merge=True)( - operation(name="op_a", needs=['a', 'b'], provides='c')(op_a), - operation(name="op_b", needs=['c', 'b'], provides='d')(op_b), - operation(name="op_c", needs=['a', 'b'], provides='e')(op_c), + operation(name="op_a", needs=["a", "b"], provides="c")(op_a), + operation(name="op_b", needs=["c", "b"], provides="d")(op_b), + operation(name="op_c", needs=["a", "b"], provides="e")(op_c), ) def infer(i): # data = open("616039-bradpitt.jpg").read() outputs = ["c", "d", "e"] - results = pipeline({"a": 1, "b":2}, outputs) - assert tuple(sorted(results.keys())) == tuple(sorted(outputs)), (outputs, results) + results = pipeline({"a": 1, "b": 2}, outputs) + assert tuple(sorted(results.keys())) == tuple(sorted(outputs)), ( + outputs, + results, + ) return results N = 33 @@ -763,30 +815,27 @@ def infer(i): # We first define some basic operations class Sum(Operation): - def compute(self, inputs): a = inputs[0] b = inputs[1] - return [a+b] + return [a + b] class Mul(Operation): - def compute(self, inputs): a = inputs[0] b = inputs[1] - return [a*b] + return [a * b] # This is an example of an operation that takes a parameter. # It also illustrates an operation that returns multiple outputs class Pow(Operation): - def compute(self, inputs): a = inputs[0] outputs = [] - for y in range(1, self.params['exponent']+1): + for y in range(1, self.params["exponent"] + 1): p = math.pow(a, y) outputs.append(p) return outputs @@ -794,26 +843,16 @@ def compute(self, inputs): def test_backwards_compatibility(): - sum_op1 = Sum( - name="sum_op1", - provides=["sum_ab"], - needs=["a", "b"] - ) - mul_op1 = Mul( - name="mul_op1", - provides=["sum_ab_times_b"], - needs=["sum_ab", "b"] - ) + sum_op1 = Sum(name="sum_op1", provides=["sum_ab"], needs=["a", "b"]) + mul_op1 = Mul(name="mul_op1", provides=["sum_ab_times_b"], needs=["sum_ab", "b"]) pow_op1 = Pow( name="pow_op1", needs=["sum_ab"], provides=["sum_ab_p1", "sum_ab_p2", "sum_ab_p3"], - params={"exponent": 3} + params={"exponent": 3}, ) sum_op2 = Sum( - name="sum_op2", - provides=["p1_plus_p2"], - needs=["sum_ab_p1", "sum_ab_p2"], + name="sum_op2", provides=["p1_plus_p2"], needs=["sum_ab_p1", "sum_ab_p2"] ) net = network.Network() @@ -831,20 +870,25 @@ def test_backwards_compatibility(): # # get all outputs - exp = {'a': 1, - 'b': 2, - 'p1_plus_p2': 12.0, - 'sum_ab': 3, - 'sum_ab_p1': 3.0, - 'sum_ab_p2': 9.0, - 'sum_ab_p3': 27.0, - 'sum_ab_times_b': 6} - assert net.compute(outputs=None, named_inputs={'a': 1, 'b': 2}) == exp + exp = { + "a": 1, + "b": 2, + "p1_plus_p2": 12.0, + "sum_ab": 3, + "sum_ab_p1": 3.0, + "sum_ab_p2": 9.0, + "sum_ab_p3": 27.0, + "sum_ab_times_b": 6, + } + assert net.compute(outputs=None, named_inputs={"a": 1, "b": 2}) == exp # get specific outputs - exp = {'sum_ab_times_b': 6} - assert net.compute(outputs=["sum_ab_times_b"], named_inputs={'a': 1, 'b': 2}) == exp + exp = {"sum_ab_times_b": 6} + assert net.compute(outputs=["sum_ab_times_b"], named_inputs={"a": 1, "b": 2}) == exp # start with inputs already computed - exp = {'sum_ab_times_b': 2} - assert net.compute(outputs=["sum_ab_times_b"], named_inputs={"sum_ab": 1, "b": 2}) == exp + exp = {"sum_ab_times_b": 2} + assert ( + net.compute(outputs=["sum_ab_times_b"], named_inputs={"sum_ab": 1, "b": 2}) + == exp + ) From 94a7626e37861f5ffe8e11c9d1a80670a0346241 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Wed, 9 Oct 2019 22:46:52 +0300 Subject: [PATCH 120/167] DOC(api): automodules --- docs/source/graph_composition.rst | 2 +- docs/source/index.rst | 1 + docs/source/reference.rst | 31 +++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 docs/source/reference.rst diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index ebb85f65..e8c66d18 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -163,4 +163,4 @@ or the ``net`` that compiled it. Execution internals ------------------- .. automodule:: graphkit.network - + :noindex: diff --git a/docs/source/index.rst b/docs/source/index.rst index 3b8d54e5..f96b03e2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,6 +29,7 @@ Such graphs are useful in computer vision, machine learning, and many other doma operations graph_composition changes + reference .. _quick-start: diff --git a/docs/source/reference.rst b/docs/source/reference.rst new file mode 100644 index 00000000..a64b29c7 --- /dev/null +++ b/docs/source/reference.rst @@ -0,0 +1,31 @@ +============= +API Reference +============= + +Module: `base` +============== + +.. automodule:: graphkit.base + :members: + :undoc-members: + +Module: `functional` +==================== + +.. automodule:: graphkit.functional + :members: + :undoc-members: + +Module: `network` +================= + +.. automodule:: graphkit.network + :members: + :undoc-members: + +Module: `plot` +============== + +.. automodule:: graphkit.plot + :members: + :undoc-members: From f68220b22f54ea3b85fb2b1d9d21669ad027040a Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 10 Oct 2019 00:49:00 +0300 Subject: [PATCH 121/167] enh(api): import also modifiers from base package --- graphkit/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/graphkit/__init__.py b/graphkit/__init__.py index 8ceca9be..e95c9bc4 100644 --- a/graphkit/__init__.py +++ b/graphkit/__init__.py @@ -5,6 +5,7 @@ __version__ = "1.3.0" from .functional import operation, compose +from .modifiers import * # noqa, on purpose to include any new modifiers # For backwards compatibility from .base import Operation From cb3297ffb0a7352051f45865622dd5a3cbab26c0 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 10 Oct 2019 12:14:16 +0300 Subject: [PATCH 122/167] doc: add project coords in package __strings__ --- graphkit/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/graphkit/__init__.py b/graphkit/__init__.py index e95c9bc4..0ee2bfdc 100644 --- a/graphkit/__init__.py +++ b/graphkit/__init__.py @@ -1,8 +1,13 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +"""Lightweight computation graphs for Python.""" __author__ = "hnguyen" __version__ = "1.3.0" +__license__ = "Apache-2.0" +__title__ = "graphkit" +__summary__ = __doc__.splitlines()[0] +__uri__ = "https://github.com/yahoo/graphkit" from .functional import operation, compose from .modifiers import * # noqa, on purpose to include any new modifiers From e67ee96de11661dc7f19c748dd0a7c9ac20a34bc Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Thu, 10 Oct 2019 12:27:08 +0300 Subject: [PATCH 123/167] fix(TC): pytest mark xfail(PY<36) due to unstable dicts --- test/test_graphkit.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 6ac57ff3..d48ed8bd 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -3,6 +3,7 @@ import math import pickle +import sys from operator import add, floordiv, mul, sub from pprint import pprint @@ -361,6 +362,11 @@ def test_pruning_multiouts_not_override_intermediates1(): assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") +@pytest.mark.xfail( + sys.version_info < (3, 6), + reason="PY3.5- have unstable dicts." + "E.g. https://travis-ci.org/ankostis/graphkit/jobs/595841023", +) def test_pruning_multiouts_not_override_intermediates2(): # Test #25: v.1.2.4 overrides intermediate data when a previous operation # must run for its other outputs (outputs asked or not) @@ -583,6 +589,11 @@ def increment(box): assert graph({"box": [0], "a": None})["box"] == [1, 1, 2] +@pytest.mark.xfail( + sys.version_info < (3, 6), + reason="PY3.5- have unstable dicts." + "E.g. https://travis-ci.org/ankostis/graphkit/jobs/595793872", +) def test_optional_per_function_with_same_output(): # Test that the same need can be both optional and not on different operations. # From ac7ec466645fbb6a874e33a2daacd01312ec8f63 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 05:27:15 +0300 Subject: [PATCH 124/167] enh(ops): __repr__ listify needs/provides... so as to politely print IndexedSets in those attributes. --- docs/source/graph_composition.rst | 5 +++-- graphkit/base.py | 10 ++++++++-- graphkit/functional.py | 10 ++++++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index e8c66d18..ab97fe9c 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -123,8 +123,9 @@ We can merge ``graphop`` and ``another_graph`` like so, avoiding a redundant ``m >>> merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) >>> print(merged_graph) - NetworkOperation(name='merged_graph', needs=IndexedSet(['a', 'b', 'c']), - provides=IndexedSet(['ab', 'a_minus_ab', 'abs_a_minus_ab_cubed', 'cab'])) + NetworkOperation(name='merged_graph', + needs=['a', 'b', 'c'], + provides=['ab', 'a_minus_ab', 'abs_a_minus_ab_cubed', 'cab']) This ``merged_graph`` will look like this: diff --git a/graphkit/base.py b/graphkit/base.py index 116d30a5..c04c8485 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -154,11 +154,17 @@ def __repr__(self): """ Display more informative names for the Operation class """ + + def aslist(i): + if i and not isinstance(i, str): + return list(i) + return i + return u"%s(name='%s', needs=%s, provides=%s)" % ( self.__class__.__name__, getattr(self, "name", None), - getattr(self, "needs", None), - getattr(self, "provides", None), + aslist(getattr(self, "needs", None)), + aslist(getattr(self, "provides", None)), ) diff --git a/graphkit/functional.py b/graphkit/functional.py index 2fb0d29d..d5b7c4b0 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -155,13 +155,19 @@ def __repr__(self): """ Display more informative names for the Operation class """ + + def aslist(i): + if i and not isinstance(i, str): + return list(i) + return i + func_name = getattr(self, "fn") func_name = func_name and getattr(func_name, "__name__", None) return u"%s(name='%s', needs=%s, provides=%s, fn=%s)" % ( self.__class__.__name__, getattr(self, "name", None), - getattr(self, "needs", None), - getattr(self, "provides", None), + aslist(getattr(self, "needs", None)), + aslist(getattr(self, "provides", None)), func_name, ) From ddf9b598803b90a98b26983d7fdb9068de46973a Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 05:31:24 +0300 Subject: [PATCH 125/167] enh(modifiers): no __slots__, repr() tell their class --- docs/source/graph_composition.rst | 2 +- graphkit/modifiers.py | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index ab97fe9c..2e067130 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -124,7 +124,7 @@ We can merge ``graphop`` and ``another_graph`` like so, avoiding a redundant ``m >>> merged_graph = compose(name="merged_graph", merge=True)(graphop, another_graph) >>> print(merged_graph) NetworkOperation(name='merged_graph', - needs=['a', 'b', 'c'], + needs=[optional('a'), optional('b'), optional('c')], provides=['ab', 'a_minus_ab', 'abs_a_minus_ab_cubed', 'cab']) This ``merged_graph`` will look like this: diff --git a/graphkit/modifiers.py b/graphkit/modifiers.py index 18c75fec..e38c2615 100644 --- a/graphkit/modifiers.py +++ b/graphkit/modifiers.py @@ -20,8 +20,7 @@ class optional(str): Here is an example of an operation that uses an optional argument:: - >>> from graphkit import operation, compose - >>> from graphkit.modifiers import optional + >>> from graphkit import operation, compose, optional >>> # Function that adds either two or three numbers. >>> def myadd(a, b, c=0): @@ -31,7 +30,11 @@ class optional(str): >>> graph = compose('mygraph')( ... operation(name='myadd', needs=['a', 'b', optional('c')], provides='sum')(myadd) ... ) - + >>> graph + NetworkOperation(name='mygraph', + needs=[optional('a'), optional('b'), optional('c')], + provides=['sum']) + >>> # The graph works with and without 'c' provided as input. >>> graph({'a': 5, 'b': 2, 'c': 4})['sum'] 11 @@ -40,7 +43,10 @@ class optional(str): """ - pass + __slots__ = () # avoid __dict__ on instances + + def __repr__(self): + return "optional('%s')" % self class sideffect(str): @@ -61,8 +67,7 @@ class sideffect(str): A typical use case is to signify columns required to produce new ones in pandas dataframes:: - >>> from graphkit import operation, compose - >>> from graphkit.modifiers import sideffect + >>> from graphkit import operation, compose, sideffect >>> # Function appending a new dataframe column from two pre-existing ones. >>> def addcolumns(df): @@ -75,6 +80,8 @@ class sideffect(str): ... needs=['df', sideffect('a'), sideffect('b')], ... provides=[sideffect('sum')])(addcolumns) ... ) + >>> graph + NetworkOperation(name='mygraph', needs=[optional('df'), optional('a'), optional('b')], provides=[sideffect('sum')]) >>> # The graph works with and without 'c' provided as input. >>> df = pd.DataFrame({'a': [5], 'b': [2]}) # doctest: +SKIP @@ -83,4 +90,7 @@ class sideffect(str): """ - pass + __slots__ = () # avoid __dict__ on instances + + def __repr__(self): + return "sideffect('%s')" % self From 6c0e15c2ecfb84f3a108ce9ee94d6297466605e3 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 06:20:21 +0300 Subject: [PATCH 126/167] REVERT(#20, 94b7291): not asking edges for OPTIONALs ... but still annotate edges with optional edge data-attribute. Reverted bc Operation must not know its network, to belong to more than one. Also the `Operation.net` contradicted `NetwotkOperation.net`; the later indeed is the network it wraps (not the net it is part of). --- CHANGES.rst | 6 ++++-- graphkit/base.py | 4 ---- graphkit/functional.py | 7 ++----- graphkit/network.py | 4 ---- test/test_graphkit.py | 2 -- 5 files changed, 6 insertions(+), 17 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 102ae8b2..fbfb5238 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,8 +21,8 @@ Network: The end result is that operations in the graph that do not have all inputs satisfied, they are skipped (in v1.2.4 they crashed). - Also started annotating edges with optionals, to make proper use of the underlying - ``networkx`` graph. + Also started annotating edges with optional/sideffects, to make proper use of + the underlying ``networkx`` graph. |v130-flowchart| @@ -109,6 +109,8 @@ Chore & Docs: - enh(:gh:`30`): added "API reference' chapter. - drop(build): ``sphinx_rtd_theme`` library is the default theme for Sphinx now. - enh(build): Add ``test`` *pip extras*. +- sound: https://www.youtube.com/watch?v=-527VazA4IQ, + https://www.youtube.com/watch?v=8J182LRi8sU&t=43s diff --git a/graphkit/base.py b/graphkit/base.py index c04c8485..d8e77ca2 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -35,10 +35,6 @@ class Operation(object): specific application. """ - #: Owning :class:`~.network.Network`, set when added in a network. - #: Needed by `_compute()` to detect *optional needs* from edge-attributes. - net = None - def __init__(self, **kwargs): """ Create a new layer instance. diff --git a/graphkit/functional.py b/graphkit/functional.py index d5b7c4b0..9506a3bd 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -14,13 +14,10 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - assert self.net - inputs = [ named_inputs[n] for n in self.needs - if "optional" not in self.net.graph.get_edge_data(n, self) - and not isinstance(n, sideffect) + if not isinstance(n, optional) and not isinstance(n, sideffect) ] # Find any optional inputs in named_inputs. Get only the ones that @@ -28,7 +25,7 @@ def _compute(self, named_inputs, outputs=None): optionals = { n: named_inputs[n] for n in self.needs - if "optional" in self.net.graph.get_edge_data(n, self) and n in named_inputs + if isinstance(n, optional) and n in named_inputs } # Combine params and optionals into one big glob of keyword arguments. diff --git a/graphkit/network.py b/graphkit/network.py index 3ca6d0fb..2493a85f 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -184,10 +184,6 @@ def add_op(self, operation): self._cached_plans = {} - # functionalOperations don't have that set. - if not operation.net: - operation.net = self - # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: if isinstance(n, optional): diff --git a/test/test_graphkit.py b/test/test_graphkit.py index d48ed8bd..2184ee9e 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -61,8 +61,6 @@ def mul_op1(a, b): def pow_op1(a, exponent=2): return [math.pow(a, y) for y in range(1, exponent + 1)] - # `_compute()` needs a` nx-DiGraph in op's `net` attribute. - compose("mock graph")(pow_op1) assert pow_op1._compute({"sum_ab": 2}, ["sum_ab_p2"]) == {"sum_ab_p2": 4.0} # Partial operation that is bound at a later time From 6ccb0757bbaab4a2fd32924564a72e0efe2c2c5a Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 06:23:18 +0300 Subject: [PATCH 127/167] fix(op): syntax when bad parallel-method choice given --- graphkit/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphkit/base.py b/graphkit/base.py index d8e77ca2..ed4c4b25 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -204,7 +204,7 @@ def set_execution_method(self, method): choices = ["parallel", "sequential"] if method not in choices: raise ValueError( - "Invalid computation method %r! Must be one of %s"(method, choices) + "Invalid computation method %r! Must be one of %s" % (method, choices) ) self._execution_method = method From 3089071be78a5ae981a16e25eb08798256979657 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 06:44:37 +0300 Subject: [PATCH 128/167] doc(CHANGES): backport history from GitHub-releaes --- CHANGES.rst | 116 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 75 insertions(+), 41 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index fbfb5238..9fb59db2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,7 +12,7 @@ enhanced plotting. The only new feature actually is the :class:`sideffect`` mod Network: -------- -- FIX(:gh:`18`, :gh:`26`, :gh:`29`, :gh:`17`, :gh:`20`): Revamped DAG SOLVER ++ FIX(:gh:`18`, :gh:`26`, :gh:`29`, :gh:`17`, :gh:`20`): Revamped DAG SOLVER to fix bad pruning described in :gh:`24` & :gh:`25` Pruning now works by breaking incoming provide-links to any given @@ -26,18 +26,18 @@ Network: |v130-flowchart| -- REFACT(:gh:`21`, :gh:`29`): Refactored Network and introduced :class:`ExecutionPlan` to keep ++ REFACT(:gh:`21`, :gh:`29`): Refactored Network and introduced :class:`ExecutionPlan` to keep compilation results (the old ``steps`` list, plus input/output names). Moved also the check for when to evict a value, from running the execution-plan, to whenbuilding it; thus, execute methods don't need outputs anymore. -- ENH(:gh:`26`): "Pin* input values that may be overriten by calculated ones. ++ ENH(:gh:`26`): "Pin* input values that may be overriten by calculated ones. This required the introduction of the new :class:`PinInstruction` in the execution plan. -- FIX(:gh:`23`, :gh:`22`-2.4.3): Keep consistent order of ``networkx.DiGraph`` ++ FIX(:gh:`23`, :gh:`22`-2.4.3): Keep consistent order of ``networkx.DiGraph`` and *sets*, to generate deterministic solutions. *Unfortunately*, it non-determinism has not been fixed in < PY3.5, just @@ -45,20 +45,20 @@ Network: `_, caused by unstable dicts, and the use of subgraphs. -- enh: Mark outputs produced by :class:`NetworkOperation`'s needs as ``optional``. ++ enh: Mark outputs produced by :class:`NetworkOperation`'s needs as ``optional``. TODO: subgraph network-operations would not be fully functional until *"optional outpus"* are dealt with (see :gh:`22`-2.5). -- enh: Annotate operation exceptions with ``ExecutionPlan`` to aid debug sessions, ++ enh: Annotate operation exceptions with ``ExecutionPlan`` to aid debug sessions, -- drop: methods ``list_layers()``/``show layers()`` not needed, ``repr()`` is ++ drop: methods ``list_layers()``/``show layers()`` not needed, ``repr()`` is a better replacement. Plotting: --------- -- ENH(:gh:`13`, :gh:`26`, :gh:`29`): Now network remembers last plan and uses that ++ ENH(:gh:`13`, :gh:`26`, :gh:`29`): Now network remembers last plan and uses that to overlay graphs with the internals of the planing and execution: |sample-plot| @@ -68,13 +68,13 @@ Plotting: - solution values (just if they are present) - "optional" needs & broken links during pruning -- REFACT: Move all API doc on plotting in a single module, splitted in 2 phases, ++ REFACT: Move all API doc on plotting in a single module, splitted in 2 phases, build DOT & render DOT -- FIX(:gh:`13`): bring plot writing into files up-to-date from PY2; do not create plot-file ++ FIX(:gh:`13`): bring plot writing into files up-to-date from PY2; do not create plot-file if given file-extension is not supported. -- FEAT: path `pydot library `_ to support rendering ++ FEAT: path `pydot library `_ to support rendering in *Jupyter notebooks*. @@ -84,16 +84,16 @@ Testing & other code: - Increased coverage from 77% --> 90%. -- ENH(:gh:`28`): use ``pytest``, to facilitate TCs parametrization. ++ ENH(:gh:`28`): use ``pytest``, to facilitate TCs parametrization. -- ENH(:gh:`30`): Doctest all code; enabled many assertions that were just print-outs ++ ENH(:gh:`30`): Doctest all code; enabled many assertions that were just print-outs in v1.2.4. -- FIX: ``operation.__repr__()`` was crashing when not all arguments ++ FIX: ``operation.__repr__()`` was crashing when not all arguments had been set - a condition frequtnly met during debugging session or failed TCs (inspired by @syamajala's 309338340). -- enh: Sped up parallel/multihtread TCs by reducing delays & repetitions. ++ enh: Sped up parallel/multihtread TCs by reducing delays & repetitions. .. tip:: You need ``pytest -m slow`` to run those slow tests. @@ -103,13 +103,13 @@ Testing & other code: Chore & Docs: ------------- -- FEAT: add changelog in ``CHANGES.rst`` file, containing flowcharts ++ FEAT: add changelog in ``CHANGES.rst`` file, containing flowcharts to compare versions ``v1.2.4 <--> v1.3..0``. -- enh: updated site & documentation for all new features, comparing with v1.2.4. -- enh(:gh:`30`): added "API reference' chapter. -- drop(build): ``sphinx_rtd_theme`` library is the default theme for Sphinx now. -- enh(build): Add ``test`` *pip extras*. -- sound: https://www.youtube.com/watch?v=-527VazA4IQ, ++ enh: updated site & documentation for all new features, comparing with v1.2.4. ++ enh(:gh:`30`): added "API reference' chapter. ++ drop(build): ``sphinx_rtd_theme`` library is the default theme for Sphinx now. ++ enh(build): Add ``test`` *pip extras*. ++ sound: https://www.youtube.com/watch?v=-527VazA4IQ, https://www.youtube.com/watch?v=8J182LRi8sU&t=43s @@ -117,47 +117,81 @@ Chore & Docs: v1.2.4 (Mar 7, 2018) ==================== -- Issues in pruning algorithm: :gh:`24`, :gh:`25` -- Blocking bug in plotting code for Python-3.x. -- Test-cases without assertions (just prints). ++ Issues in pruning algorithm: :gh:`24`, :gh:`25` ++ Blocking bug in plotting code for Python-3.x. ++ Test-cases without assertions (just prints). |v124-flowchart| -1.2.2 (Mar 7, 2018) -=================== +1.2.2 (Mar 7, 2018, @huyng): Fixed versioning +============================================= +Versioning now is manually specified to avoid bug where the version +was not being correctly reflected on pip install deployments -1.2.1 (Feb 23, 2018) -==================== -1.2.0 (Feb 13, 2018) -==================== +1.2.1 (Feb 23, 2018, @huyng): Fixed multi-threading bug and faster compute through caching of `find_necessary_steps` +==================================================================================================================== +We've introduced a cache to avoid computing find_necessary_steps multiple times +during each inference call. -1.1.0 (Nov 9, 2017) -=================== +This has 2 benefits: ++ It reduces computation time of the compute call ++ It avoids a subtle multi-threading bug in networkx when accessing the graph + from a high number of threads. -1.0.4 (Nov 3, 2017) -=================== -1.0.3 (Jan 31, 2017) -==================== +1.2.0 (Feb 13, 2018, @huyng) +============================ +Added `set_execution_method('parallel')` for execution of graphs in parallel. + + +1.1.0 (Nov 9, 2017, @huyng) +=========================== + +Update setup.py + + +1.0.4 (Nov 3, 2017, @huyng): Networkx 2.0 compatibility +======================================================= + +Minor Bug Fixes: + ++ Compatibility fix for networkx 2.0 ++ `net.times` now only stores timing info from the most recent run + + +1.0.3 (Jan 31, 2017, @huyng): Make plotting dependencies optional +================================================================= + ++ Merge pull request :gh:`6` from yahoo/plot-optional ++ make plotting dependencies optional + + +1.0.2 (Sep 29, 2016, @pumpikano): Merge pull request :gh:`5` from yahoo/remove-packaging-dep +============================================================================================ + ++ Remove 'packaging' as dependency -1.0.2 (Sep 29, 2016) -==================== 1.0.1 (Aug 24, 2016) ==================== -1.0 (Aug 2, 2016) -================= -First public release in PyPi. +1.0 (Aug 2, 2016, @robwhess) +============================ + +First public release in PyPi & GitHub. + ++ Merge pull request :gh:`3` from robwhess/travis-build ++ Travis build + .. _substitutions: From 88c68e9259e7bc99384a52f8fa04a5f4d1921b04 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 06:55:26 +0300 Subject: [PATCH 129/167] fix(doc): reorder chapters, changes at the bottom, ... + homegine titles + fix: autodoc renders nothing without :special-members: --- CHANGES.rst | 6 +++--- docs/source/graph_composition.rst | 5 +++-- docs/source/index.rst | 2 +- docs/source/operations.rst | 1 + 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9fb59db2..381f3338 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,6 @@ -################## -Graphkit Changelog -################## +######### +Changelog +######### v1.3.0 (Oct 2019): New DAG solver, better plotting & "sideffect" ================================================================ diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index 2e067130..6b7d062d 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -1,7 +1,7 @@ .. _graph-composition: -Graph Composition and Use -========================= +Graph Composition +================= GraphKit's ``compose`` class handles the work of tying together ``operation`` instances into a runnable computation graph. @@ -12,6 +12,7 @@ For now, here's the specification of ``compose``. We'll get into how to use it .. autoclass:: graphkit.compose :members: __call__ + :special-members: .. _simple-graph-composition: diff --git a/docs/source/index.rst b/docs/source/index.rst index f96b03e2..aa4b234a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -28,8 +28,8 @@ Such graphs are useful in computer vision, machine learning, and many other doma operations graph_composition - changes reference + changes .. _quick-start: diff --git a/docs/source/operations.rst b/docs/source/operations.rst index 0376a7a1..cde5ca42 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -13,6 +13,7 @@ There are many ways to instantiate an ``operation``, and we'll get into more det .. autoclass:: graphkit.operation :members: __init__, __call__ :member-order: bysource + :special-members: Operations are just functions From 14edbf7e1a763e538fec98cc32736259ff9d682b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 08:36:08 +0300 Subject: [PATCH 130/167] ENH(plot): show SIDEFFECTS on diagrams, MORE + enh(net): mark dag-edges with sideffects; + enh(plot): plot sideffect inks as such; + enh(plot): update legend; + doc(plot): move legend text along with the code producing it. + doc: stray image fixes. --- docs/source/changes.rst | 2 +- docs/source/images/GraphkitLegend.svg | 76 ++++++++++++++++----------- docs/source/index.rst | 1 - graphkit/network.py | 18 ++++--- graphkit/plot.py | 33 +++++++----- 5 files changed, 76 insertions(+), 54 deletions(-) diff --git a/docs/source/changes.rst b/docs/source/changes.rst index 8dbd02c5..63553b51 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -7,7 +7,7 @@ :align: bottom .. |v130-flowchart| image:: images/GraphkitFlowchart-v1.3.0.svg :alt: graphkit-v1.3.0 flowchart - :width: 75% + :scale: 75% .. |v124-flowchart| image:: images/GraphkitFlowchart-v1.2.4.svg :alt: graphkit-v1.2.4 flowchart :scale: 75% diff --git a/docs/source/images/GraphkitLegend.svg b/docs/source/images/GraphkitLegend.svg index 6b5f3f93..11887496 100644 --- a/docs/source/images/GraphkitLegend.svg +++ b/docs/source/images/GraphkitLegend.svg @@ -4,15 +4,15 @@ - + G - + cluster_legend - -Graphkit Legend + +Graphkit Legend @@ -37,8 +37,8 @@ executed - -executed + +executed @@ -64,36 +64,36 @@ inp_out - -inp+out + +inp+out evicted - -evicted + +evicted pinned - -pinned + +pinned evpin - -evict+pin + +evict+pin sol - -in solution + +in solution @@ -121,30 +121,42 @@ - + -e4 - -pruned dependency +e33 + +sideffect - + -e3->e4 - - +e3->e33 + + - + +e4 + +pruned dependency + + + +e33->e4 + + + + + e5 - -execution sequence + +execution sequence - + e4->e5 - - -1 + + +1 diff --git a/docs/source/index.rst b/docs/source/index.rst index aa4b234a..b57ce9b1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -119,7 +119,6 @@ Code licensed under the Apache License, Version 2.0 license. See LICENSE file fo .. |travis-status| image:: https://travis-ci.org/yahoo/graphkit.svg :alt: Travis continuous integration testing ok? (Linux) - :scale: 100% :target: https://travis-ci.org/yahoo/graphkit/builds .. |cover-status| image:: https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg diff --git a/graphkit/network.py b/graphkit/network.py index 2493a85f..f2372951 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -74,16 +74,15 @@ import networkx as nx from boltons.setutils import IndexedSet as iset +from networkx import DiGraph from . import plot from .base import Operation -from .modifiers import optional +from .modifiers import optional, sideffect log = logging.getLogger(__name__) -from networkx import DiGraph - if sys.version_info < (3, 6): """ Consistently ordered variant of :class:`~networkx.DiGraph`. @@ -186,14 +185,19 @@ def add_op(self, operation): # add nodes and edges to graph describing the data needs for this layer for n in operation.needs: + kw = {} if isinstance(n, optional): - self.graph.add_edge(DataPlaceholderNode(n), operation, optional=True) - else: - self.graph.add_edge(DataPlaceholderNode(n), operation) + kw["optional"] = True + if isinstance(n, sideffect): + kw["sideffect"] = True + self.graph.add_edge(DataPlaceholderNode(n), operation, **kw) # add nodes and edges to graph describing what this layer provides for p in operation.provides: - self.graph.add_edge(operation, DataPlaceholderNode(p)) + kw = {} + if isinstance(n, sideffect): + kw["sideffect"] = True + self.graph.add_edge(operation, DataPlaceholderNode(p), **kw) def _build_execution_steps(self, dag, inputs, outputs): """ diff --git a/graphkit/plot.py b/graphkit/plot.py index b41ea99f..1b5daa0e 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -14,7 +14,7 @@ class Plotter(object): Classes wishing to plot their graphs should inherit this and ... implement property ``_plot`` to return a "partial" callable that somehow - ends up calling :func:`plot.plot_graph()` with the `graph` or any other + ends up calling :func:`plot.render_pydot()` with the `graph` or any other args binded appropriately. The purpose is to avoid copying this function & documentation here around. """ @@ -55,16 +55,12 @@ def plot(self, filename=None, show=False, **kws): Note that the `graph` argument is absent - Each Plotter provides - its own graph internally; use directly :func:`plot_graph()` to provide + its own graph internally; use directly :func:`render_pydot()` to provide a different graph. - **Legend:** - - . figure:: ../images/Graphkitlegend.svg + .. image:: images/GraphkitLegend.svg :alt: Graphkit Legend - see :func:`legend()` - *NODES:* oval @@ -95,11 +91,16 @@ def plot(self, filename=None, show=False, **kws): sources-operations *provides* target-data) dashed black arrows optional needs + blue arrows + sideffect needs/provides wheat arrows broken dependency (``provide``) during pruning green-dotted arrows execution steps labeled in succession + + To generate the **legend**, see :func:`legend()`. + **Sample code:** >>> from graphkit import compose, operation @@ -270,15 +271,18 @@ def get_node_name(a): append_any_clusters(dot) # draw edges - for src, dst in graph.edges: + for src, dst, data in graph.edges(data=True): src_name = get_node_name(src) dst_name = get_node_name(dst) + kw = {} - if isinstance(dst, Operation) and _is_class_value_in_list( - dst.needs, optional, src - ): + if data.get("optional"): kw["style"] = "dashed" - edge = pydot.Edge(src=src_name, dst=dst_name, **kw) + if data.get("sideffect"): + kw["color"] = "blue" + + # `splines=ortho` not working :-() + edge = pydot.Edge(src=src_name, dst=dst_name, splines="ortho", **kw) _apply_user_props(edge, edge_props, key=(src, dst)) @@ -305,6 +309,7 @@ def get_node_name(a): fontsize=18, penwidth=steps_thickness, arrowhead="vee", + splines=True, ) dot.add_edge(edge) @@ -402,8 +407,10 @@ def legend(filename=None, show=None): e1 -> e2; e3 [color=invis label="optional"]; e2 -> e3 [style=dashed]; + e33 [color=invis label="sideffect"]; + e3 -> e33 [color=blue]; e4 [color=invis penwidth=3 label="pruned dependency"]; - e3 -> e4 [color=wheat penwidth=2]; + e33 -> e4 [color=wheat penwidth=2]; e5 [color=invis penwidth=4 label="execution sequence"]; e4 -> e5 [color="#009999" penwidth=4 style=dotted arrowhead=vee label=1 fontcolor="#009999"]; } From a38826ede15c3f589fa871e21b0e9283791315dd Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 10:33:37 +0300 Subject: [PATCH 131/167] FIX(PyPi): +TC to check if REAME valid markdown, ... + fix glitches in README. --- README.md | 14 ++++++-------- test/test_doc.py | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) create mode 100644 test/test_doc.py diff --git a/README.md b/README.md index b1db0c46..d8cf6169 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Here's how to install: pip install graphkit OR with dependencies for plotting support (and you need to install [`Graphviz`](https://graphviz.org) -program separately with your OS tools):: +program separately with your OS tools): pip install graphkit[plot] @@ -56,20 +56,19 @@ multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): >>> print(out) {'a_minus_ab': -8} - As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! ## Plotting For debugging the above graph-operation you may plot the *execution plan* -of the last computation it using these methods:: +of the last computation it using these methods: ```python - graphop.plot(show=True) # open a matplotlib window - graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... - graphop.plot() # without arguments return a pydot.DOT object - graphop.plot(solution=out) # annotate graph with solution values +graphop.plot(show=True) # open a matplotlib window +graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... +graphop.plot() # without arguments return a pydot.DOT object +graphop.plot(solution=out) # annotate graph with solution values ``` ![Intro graph](docs/source/images/intro.svg "Intro graph") @@ -80,4 +79,3 @@ of the last computation it using these methods:: # License Code licensed under the Apache License, Version 2.0 license. See LICENSE file for terms. - diff --git a/test/test_doc.py b/test/test_doc.py new file mode 100644 index 00000000..f82a2b16 --- /dev/null +++ b/test/test_doc.py @@ -0,0 +1,23 @@ +# Copyright 2016, Yahoo Inc. +# Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +import os.path as osp +import subprocess +import sys + +def test_README_as_PyPi_landing_page(monkeypatch): + from docutils import core as dcore + + proj_path = osp.join(osp.dirname(__file__), "..") + long_desc = subprocess.check_output( + "python setup.py --long-description".split(), cwd=proj_path + ) + assert long_desc + + monkeypatch.setattr(sys, "exit", lambda *args: None) + dcore.publish_string( + long_desc, + enable_exit_status=False, + settings_overrides={ # see `docutils.frontend` for more. + "halt_level": 2 # 2=WARN, 1=INFO + }, + ) From 01178c1c19da56530bfa2c03b6e6542666dd8b00 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 10:39:01 +0300 Subject: [PATCH 132/167] feat(build): add build.sh script because ... if built without cleaing, stray artifacts might make it into wheel. --- bin/build.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100755 bin/build.sh diff --git a/bin/build.sh b/bin/build.sh new file mode 100755 index 00000000..d3aea133 --- /dev/null +++ b/bin/build.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# clean, or invalid files in packages +rm -vrf ./build/* ./dist/* ./*.pyc ./*.tgz ./*.egg-info +python setup.py sdist bdist_wheel + From 5195714d844fa2d188fed47b148f4dbd5d0624f1 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 10:39:22 +0300 Subject: [PATCH 133/167] DOC: improved Opening of the Project --- README.md | 23 ++++++++++++++++------- docs/source/index.rst | 9 +++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d8cf6169..a23274fa 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,16 @@ # GraphKit -[![Latest version in PyPI](https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version)](https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version) [![Latest version in GitHub](https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases)](https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases) [![Supported Python versions of latest release in PyPi](https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python)](https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python) [![Build Status](https://travis-ci.org/yahoo/graphkit.svg?branch=master)](https://travis-ci.org/yahoo/graphkit) [![codecov](https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg)](https://codecov.io/gh/yahoo/graphkit) [![License](https://img.shields.io/pypi/l/graphkit.svg)](https://img.shields.io/pypi/l/graphkit.svg) - -[![Github watchers](https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social) [![Github stargazers](https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social) [![Github forks](https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social) [![Issues count](http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social)](http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social) +[![Latest version in PyPI](https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version)](https://img.shields.io/pypi/v/graphkit.svg?label=PyPi%20version) +[![Latest version in GitHub](https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases)](https://img.shields.io/github/v/release/yahoo/graphkit.svg?label=GitHub%20release&include_prereleases) +[![Supported Python versions of latest release in PyPi](https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python)](https://img.shields.io/pypi/pyversions/graphkit.svg?label=Python) +[![Build Status](https://travis-ci.org/yahoo/graphkit.svg?branch=master)](https://travis-ci.org/yahoo/graphkit) +[![codecov](https://codecov.io/gh/yahoo/graphkit/branch/master/graph/badge.svg)](https://codecov.io/gh/yahoo/graphkit) +[![License](https://img.shields.io/pypi/l/graphkit.svg)](https://img.shields.io/pypi/l/graphkit.svg) + +[![Github watchers](https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/watchers/yahoo/graphkit.svg?style=social) +[![Github stargazers](https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/stars/yahoo/graphkit.svg?style=social) +[![Github forks](https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social)](https://img.shields.io/github/forks/yahoo/graphkit.svg?style=social) +[![Issues count](http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social)](http://img.shields.io/github/issues/yahoo/graphkit.svg?style=social) [Full Documentation](https://pythonhosted.org/graphkit/) @@ -12,10 +20,11 @@ ## Lightweight computation graphs for Python -GraphKit is a lightweight Python module for creating and running ordered graphs of computations, -where the nodes of the graph correspond to computational operations, and the edges -correspond to output --> input dependencies between those operations. -Such graphs are useful in computer vision, machine learning, and many other domains. +GraphKit is an an understandable and lightweight Python module for building and running +ordered graphs of computations. +The API posits a fair compromise between features and complexity without precluding any. +It might be of use in computer vision, machine learning and other data science domains, +or become the core of a custom ETL pipelne. ## Quick start diff --git a/docs/source/index.rst b/docs/source/index.rst index b57ce9b1..256e2fe1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -18,10 +18,11 @@ GraphKit Lightweight computation graphs for Python ----------------------------------------- -GraphKit is a lightweight Python module for creating and running ordered graphs of computations, -where the nodes of the graph correspond to computational operations, and the edges -correspond to output --> input dependencies between those operations. -Such graphs are useful in computer vision, machine learning, and many other domains. +GraphKit is an an understandable and lightweight Python module for building and running +ordered graphs of computations. +The API posits a fair compromise between features and complexity without precluding any. +It might be of use in computer vision, machine learning and other data science domains, +or become the core of a custom ETL pipelne. .. toctree:: :maxdepth: 2 From 4d1d979882c39957885c31d840b3ad0d50518437 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 11:16:23 +0300 Subject: [PATCH 134/167] fix(pytest): list test items or --ignore=setup.py not working.... and when giving `pytest --lf` i get: _____________________________________ ERROR collecting setup.py _____________________________________ /usr/lib/python3.7/distutils/fancy_getopt.py:233: in getopt opts, args = getopt.getopt(args, short_opts, self.long_opts) /usr/lib/python3.7/getopt.py:93: in getopt opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) /usr/lib/python3.7/getopt.py:157: in do_longs has_arg, opt = long_has_args(opt, longopts) /usr/lib/python3.7/getopt.py:174: in long_has_args raise GetoptError(_('option --%s not recognized') % opt, opt) E getopt.GetoptError: option --lf not recognized --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index c9641347..aa85f913 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,8 +10,8 @@ markers = slow: marks tests as slow, select them with `-m slow` or `-m 'not slow'` # TODO: enable doctests in README.md. -addopts = --ignore setup.py - # faciltate developer +addopts = graphkit test/ docs/source/ README.md + # Faciltate developer, rum'em all with -m 'slow or not slow'. -m 'not slow' --doctest-report ndiff --doctest-continue-on-failure From 313b2416477f1bc00a6f2b65bade526346e45ddf Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 14:23:43 +0300 Subject: [PATCH 135/167] fic(doc): CODE in README was broken unbackquoted --- README.md | 3 +++ docs/source/graph_composition.rst | 2 +- graphkit/base.py | 22 +++++++++++++----- graphkit/functional.py | 37 +++++++++++++++++++------------ graphkit/network.py | 3 ++- 5 files changed, 45 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index a23274fa..f814268a 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ program separately with your OS tools): Here's a Python script with an example GraphKit computation graph that produces multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): +```python >>> from operator import mul, sub >>> from graphkit import compose, operation @@ -65,6 +66,8 @@ multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): >>> print(out) {'a_minus_ab': -8} +``` + As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index 6b7d062d..33614ef5 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -153,7 +153,7 @@ as a debug aid: ... operation(name="screamer", needs=['a'], provides=["foo"])(scream) ... )({'a': None}) ... except ValueError as ex: -... print(ex.execution_node) +... print(ex.operation) ... print(ex.execution_plan) FunctionalOperation(name='screamer', needs=['a'], provides=['foo']) ExecutionPlan(inputs=('a',), outputs=(), steps: diff --git a/graphkit/base.py b/graphkit/base.py index ed4c4b25..dce5a9da 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -102,12 +102,22 @@ def _compute(self, named_inputs, outputs=None): inputs = [named_inputs[d] for d in self.needs] results = self.compute(inputs) - results = zip(self.provides, results) - if outputs: - outputs = set(outputs) - results = filter(lambda x: x[0] in outputs, results) - - return dict(results) + try: + results = zip(self.provides, results) + + if outputs: + outputs = set(outputs) + results = filter(lambda x: x[0] in outputs, results) + + return dict(results) + except Exception as ex: + ## Annotate exception with debugging aid on error + # + ex.operation = self + ex.operation_inputs = inputs + ex.operation_asked = outputs + ex.operation_results = locals().get('results') + raise def _after_init(self): """ diff --git a/graphkit/functional.py b/graphkit/functional.py index 9506a3bd..f3478b63 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -31,23 +31,32 @@ def _compute(self, named_inputs, outputs=None): # Combine params and optionals into one big glob of keyword arguments. kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} - result = self.fn(*inputs, **kwargs) - # Don't expect sideffect outputs. provides = [n for n in self.provides if not isinstance(n, sideffect)] - if not provides: - # All outputs were sideffects. - return {} - - if len(provides) == 1: - result = [result] - - result = zip(provides, result) - if outputs: - outputs = set(n for n in outputs if not isinstance(n, sideffect)) - result = filter(lambda x: x[0] in outputs, result) - return dict(result) + try: + result = self.fn(*inputs, **kwargs) + + if not provides: + # All outputs were sideffects. + return {} + + if len(provides) == 1: + result = [result] + + result = zip(provides, result) + if outputs: + outputs = set(n for n in outputs if not isinstance(n, sideffect)) + result = filter(lambda x: x[0] in outputs, result) + + return dict(result) + except Exception as ex: + ex.operation = self + ex.operation_inputs = (inputs, kwargs) + ex.operation_provides = provides + ex.operation_asked = outputs + ex.operation_results = locals().get('result') + raise def __call__(self, *args, **kwargs): return self.fn(*args, **kwargs) diff --git a/graphkit/network.py b/graphkit/network.py index f2372951..4eadc3c0 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -597,7 +597,8 @@ def _call_operation(self, op, solution): try: return op._compute(solution) except Exception as ex: - ex.execution_node = op + ## Annotate exception with debugging aid on error + # ex.execution_plan = self raise From 58e51122a215db0d54c42092dab135520e6ce762 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 17:04:21 +0300 Subject: [PATCH 136/167] ENH(ops): ANNNOTATE func-ERRORS with internal data,.. as debug aid (was doing that for plans already). + enh: annotate also networks. --- docs/source/graph_composition.rst | 62 +++++++++++++++++++++++------ graphkit/base.py | 24 ++++++----- graphkit/functional.py | 66 +++++++++++++++++-------------- graphkit/network.py | 48 +++++++++++++--------- 4 files changed, 131 insertions(+), 69 deletions(-) diff --git a/docs/source/graph_composition.rst b/docs/source/graph_composition.rst index 33614ef5..c86bf0c0 100644 --- a/docs/source/graph_composition.rst +++ b/docs/source/graph_composition.rst @@ -139,11 +139,13 @@ As always, we can run computations with this graph by simply calling it:: -Errors ------- +Errors & debugging +------------------ -If an operation fails, its exception gets annotated with the folllowing properties -as a debug aid: +If an operation fails, the original exception gets annotated +with the folllowing properties, as a debug aid: + +>>> from pprint import pprint >>> def scream(*args): ... raise ValueError("Wrong!") @@ -153,14 +155,50 @@ as a debug aid: ... operation(name="screamer", needs=['a'], provides=["foo"])(scream) ... )({'a': None}) ... except ValueError as ex: -... print(ex.operation) -... print(ex.execution_plan) -FunctionalOperation(name='screamer', needs=['a'], provides=['foo']) -ExecutionPlan(inputs=('a',), outputs=(), steps: - +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])) - -Of course from the :class:`ExecutionPlan` you can explore its ``dag`` property -or the ``net`` that compiled it. +... pprint(ex.graphkit_aid) +{'network': + ... + 'operation': FunctionalOperation(name='screamer', needs=['a'], provides=['foo']), + 'operation_args': {'args': [None], 'kwargs': {}}, + 'operation_fnouts': None, + 'operation_outs': None, + 'operation_results': None, + 'plan': ExecutionPlan(inputs=('a',), outputs=(), steps: + +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])), + 'solution': {'a': None}} + + +The following annotated attributes might have values on an exception ``ex``: + +``ex.network`` + the innermost network owning the failed operation/function + +``ex.plan`` + the innermost plan that executing when a operation crashed + +``ex.operation`` + the innermost operation that failed + +``ex.operation_args`` + either a 2-tuple ``(args, kwargs)`` or just the ``args`` fed to the operation + +``ex.operation_fnouts`` + the names of the outputs the function was expected to return + +``ex.operation_outs`` + the names eventually the graph needed from the operation + (a subset of the above) + +``ex.operation_results`` + the values dict, if any; it maybe a *zip* of the provides + with the actual returned values of the function, ot the raw results. + +.. note:: + The :ref:`plotting` capabilities, along with the above annotation of exceptions + with the internal state of plan/operation often renders a debugger session + unnecessary. But since the state of the annotated values might be incomple, + you may not always avoid one. + Execution internals ------------------- diff --git a/graphkit/base.py b/graphkit/base.py index dce5a9da..b50a64e6 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -99,24 +99,28 @@ def compute(self, inputs): raise NotImplementedError("Define callable of %r!" % self) def _compute(self, named_inputs, outputs=None): - inputs = [named_inputs[d] for d in self.needs] - results = self.compute(inputs) - try: + args = [named_inputs[d] for d in self.needs] + results = self.compute(args) + results = zip(self.provides, results) if outputs: - outputs = set(outputs) - results = filter(lambda x: x[0] in outputs, results) + outs = set(outputs) + results = filter(lambda x: x[0] in outs, results) return dict(results) except Exception as ex: - ## Annotate exception with debugging aid on error + ## Annotate exception with debugging aid on errors. # - ex.operation = self - ex.operation_inputs = inputs - ex.operation_asked = outputs - ex.operation_results = locals().get('results') + locs = locals() + err_aid = getattr(ex, "graphkit_aid", {}) + err_aid.setdefault("operation", self) + err_aid.setdefault("operation_args", locs.get("args")) + err_aid.setdefault("operation_fnouts", locs.get("outputs")) + err_aid.setdefault("operation_outs", locs.get("outputs")) + err_aid.setdefault("operation_results", locs.get("results")) + setattr(ex, "graphkit_aid", err_aid) raise def _after_init(self): diff --git a/graphkit/functional.py b/graphkit/functional.py index f3478b63..b5c22f02 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -1,11 +1,11 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -from boltons.setutils import IndexedSet as iset import networkx as nx +from boltons.setutils import IndexedSet as iset -from .base import Operation, NetworkOperation -from .network import Network +from .base import NetworkOperation, Operation from .modifiers import optional, sideffect +from .network import Network class FunctionalOperation(Operation): @@ -14,28 +14,28 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - inputs = [ - named_inputs[n] - for n in self.needs - if not isinstance(n, optional) and not isinstance(n, sideffect) - ] - - # Find any optional inputs in named_inputs. Get only the ones that - # are present there, no extra `None`s. - optionals = { - n: named_inputs[n] - for n in self.needs - if isinstance(n, optional) and n in named_inputs - } - - # Combine params and optionals into one big glob of keyword arguments. - kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} - - # Don't expect sideffect outputs. - provides = [n for n in self.provides if not isinstance(n, sideffect)] - try: - result = self.fn(*inputs, **kwargs) + args = [ + named_inputs[n] + for n in self.needs + if not isinstance(n, optional) and not isinstance(n, sideffect) + ] + + # Find any optional inputs in named_inputs. Get only the ones that + # are present there, no extra `None`s. + optionals = { + n: named_inputs[n] + for n in self.needs + if isinstance(n, optional) and n in named_inputs + } + + # Combine params and optionals into one big glob of keyword arguments. + kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} + + # Don't expect sideffect outputs. + provides = [n for n in self.provides if not isinstance(n, sideffect)] + + result = self.fn(*args, **kwargs) if not provides: # All outputs were sideffects. @@ -51,11 +51,19 @@ def _compute(self, named_inputs, outputs=None): return dict(result) except Exception as ex: - ex.operation = self - ex.operation_inputs = (inputs, kwargs) - ex.operation_provides = provides - ex.operation_asked = outputs - ex.operation_results = locals().get('result') + ## Annotate exception with debugging aid on errors. + # + locs = locals() + err_aid = getattr(ex, "graphkit_aid", {}) + err_aid.setdefault("operation", self) + err_aid.setdefault( + "operation_args", + {"args": locs.get("args"), "kwargs": locs.get("kwargs")}, + ) + err_aid.setdefault("operation_fnouts", locs.get("outputs")) + err_aid.setdefault("operation_outs", locs.get("outputs")) + err_aid.setdefault("operation_results", locs.get("results")) + setattr(ex, "graphkit_aid", err_aid) raise def __call__(self, *args, **kwargs): diff --git a/graphkit/network.py b/graphkit/network.py index 4eadc3c0..02abd9a7 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -455,27 +455,37 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) :returns: a dictionary of output data objects, keyed by name. """ + try: + assert ( + isinstance(outputs, (list, tuple)) or outputs is None + ), "The outputs argument must be a list" - assert ( - isinstance(outputs, (list, tuple)) or outputs is None - ), "The outputs argument must be a list" - - # Build the execution plan. - self.last_plan = plan = self.compile(named_inputs.keys(), outputs) + # Build the execution plan. + self.last_plan = plan = self.compile(named_inputs.keys(), outputs) - # start with fresh data solution. - solution = dict(named_inputs) + # start with fresh data solution. + solution = dict(named_inputs) - plan.execute(solution, overwrites_collector, method) + plan.execute(solution, overwrites_collector, method) - if outputs: - # Filter outputs to just return what's requested. - # Otherwise, eturn the whole solution as output, - # including input and intermediate data nodes. - # TODO: assert no other outputs exists due to DelInstructs. - solution = dict(i for i in solution.items() if i[0] in outputs) + if outputs: + # Filter outputs to just return what's requested. + # Otherwise, eturn the whole solution as output, + # including input and intermediate data nodes. + # TODO: assert no other outputs exists due to DelInstructs. + solution = dict(i for i in solution.items() if i[0] in outputs) - return solution + return solution + except Exception as ex: + ## Annotate exception with debugging aid on errorrs. + # + locs = locals() + err_aid = getattr(ex, "graphkit_aid", {}) + err_aid.setdefault("network", locs.get("self")) + err_aid.setdefault("plan", locs.get("plan")) + err_aid.setdefault("solution", locs.get("solution")) + setattr(ex, "graphkit_aid", err_aid) + raise class ExecutionPlan( @@ -597,9 +607,11 @@ def _call_operation(self, op, solution): try: return op._compute(solution) except Exception as ex: - ## Annotate exception with debugging aid on error + ## Annotate exception with debugging aid on errors. # - ex.execution_plan = self + err_aid = getattr(ex, "graphkit_aid", {}) + err_aid.setdefault("plan", self) + setattr(ex, "graphkit_aid", err_aid) raise def _execute_thread_pool_barrier_method( From 536d31bd371c07e9f52505632475ad4d9cd8addd Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 17:15:42 +0300 Subject: [PATCH 137/167] FIX(doc): README were not PyPi-validating --- README.md | 54 +++++++++++++++++++++++------------------------- test/test_doc.py | 1 + 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index f814268a..a04d58db 100644 --- a/README.md +++ b/README.md @@ -40,34 +40,32 @@ program separately with your OS tools): Here's a Python script with an example GraphKit computation graph that produces multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): -```python ->>> from operator import mul, sub ->>> from graphkit import compose, operation - ->>> # Computes |a|^p. ->>> def abspow(a, p): -... c = abs(a) ** p -... return c - ->>> # Compose the mul, sub, and abspow operations into a computation graph. ->>> graphop = compose(name="graphop")( -... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), -... operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), -... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) -... ) - ->>> # Run the graph and request all of the outputs. ->>> out = graphop({'a': 2, 'b': 5}) ->>> print(out) -{'a': 2, 'b': 5, 'ab': 10, 'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512} - ->>> # Run the graph and request a subset of the outputs. ->>> out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) ->>> print(out) -{'a_minus_ab': -8} - -``` - + >>> from operator import mul, sub + >>> from graphkit import compose, operation + + >>> # Computes |a|^p. + >>> def abspow(a, p): + ... c = abs(a) ** p + ... return c + + >>> # Compose the mul, sub, and abspow operations into a computation graph. + >>> graphop = compose(name="graphop")( + ... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + ... operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), + ... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) + ... ) + + >>> # Run the graph and request all of the outputs. + >>> out = graphop({'a': 2, 'b': 5}) + >>> print(out) + {'a': 2, 'b': 5, 'ab': 10, 'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512} + + >>> # Run the graph and request a subset of the outputs. + >>> out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) + >>> print(out) + {'a_minus_ab': -8} + + As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! diff --git a/test/test_doc.py b/test/test_doc.py index f82a2b16..f54cf76f 100644 --- a/test/test_doc.py +++ b/test/test_doc.py @@ -4,6 +4,7 @@ import subprocess import sys + def test_README_as_PyPi_landing_page(monkeypatch): from docutils import core as dcore From 0508558a51aec99c4350d83736a2dcae2839e4b4 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 17:27:36 +0300 Subject: [PATCH 138/167] doc(plot): explain how to reset plot to bare-bone --- docs/source/index.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 256e2fe1..2dd15f54 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -101,9 +101,13 @@ of the last computation it using these methods:: The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. -.. Tip:: - The `pydot.Dot `_ instances returned by ``plot()`` - are rendered directly in *Jupyter/IPython* notebooks as SVG images. +Note that if you plot a compose a graph operation it comes out bare bone, +with just the 2 types of nodes (data & operations) & execution-plan sequence. +But as soon as you run it, the net plot calls will print more of the internals. +These are based on the ``graph_op.net.last_plan`` attribute which *caches* +the last run to inspect it. If you want the bare-bone diagram, simply reset it:: + + netop.net.last_plan = None .. NOTE:: For plots, `Graphviz `_ program must be in your PATH, @@ -112,6 +116,10 @@ of the last computation it using these methods:: pip install graphkit[plot] +.. Tip:: + The `pydot.Dot `_ instances returned by ``plot()`` + are rendered directly in *Jupyter/IPython* notebooks as SVG images. + License ------- From c25082e8b55eeb906f474028d2779d1779801b93 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 17:43:45 +0300 Subject: [PATCH 139/167] DOC(plot): MOVE plot + debug sections to own CHAPTER --- docs/source/index.rst | 44 +------------- docs/source/operations.rst | 3 + docs/source/plotting.rst | 120 +++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 41 deletions(-) create mode 100644 docs/source/plotting.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index 2dd15f54..dc503b23 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -29,6 +29,7 @@ or become the core of a custom ETL pipelne. operations graph_composition + plotting reference changes @@ -77,49 +78,10 @@ Here's a Python script with an example GraphKit computation graph that produces # Prints "{'a_minus_ab': -8}". print(out) -As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! +As you can see, any function can be used as an operation in GraphKit, +even ones imported from system modules! -.. _plotting: - -Plotting --------- - -For debugging the above graph-operation you may plot the *execution plan* -of the last computation it using these methods:: - - graphop.plot(show=True) # open a matplotlib window - graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... - graphop.plot() # without arguments return a pydot.DOT object - graphop.plot(solution=out) # annotate graph with solution values - -.. image:: images/intro.svg - :alt: Intro graph - -.. figure:: images/GraphkitLegend.svg - :alt: Graphkit Legend - - The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. - -Note that if you plot a compose a graph operation it comes out bare bone, -with just the 2 types of nodes (data & operations) & execution-plan sequence. -But as soon as you run it, the net plot calls will print more of the internals. -These are based on the ``graph_op.net.last_plan`` attribute which *caches* -the last run to inspect it. If you want the bare-bone diagram, simply reset it:: - - netop.net.last_plan = None - -.. NOTE:: - For plots, `Graphviz `_ program must be in your PATH, - and ``pydot`` & ``matplotlib`` python packages installed. - You may install both when installing ``graphkit`` with its ``plot`` extras:: - - pip install graphkit[plot] - -.. Tip:: - The `pydot.Dot `_ instances returned by ``plot()`` - are rendered directly in *Jupyter/IPython* notebooks as SVG images. - License ------- diff --git a/docs/source/operations.rst b/docs/source/operations.rst index cde5ca42..55a84ebd 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -62,6 +62,9 @@ The ``needs`` and ``provides`` arguments to the operations in this script define .. image:: images/intro.svg +.. Tip:: + See :ref:`plotting` on how to make diagrams like this. + Constant operation parameters: ``params`` ----------------------------------------- diff --git a/docs/source/plotting.rst b/docs/source/plotting.rst new file mode 100644 index 00000000..ebb4e90a --- /dev/null +++ b/docs/source/plotting.rst @@ -0,0 +1,120 @@ +###################### +Plotting and Debugging +###################### + +.. _plotting: + +Plotting +-------- + +For :ref:`debugging` it is necessary to visualize the graph-operation. +You may plot the original plot and annotate on top the *execution plan* and +solution of the last computation, calling methods with arguments like this:: + + graphop.plot(show=True) # open a matplotlib window + graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... + graphop.plot() # without arguments return a pydot.DOT object + graphop.plot(solution=out) # annotate graph with solution values + +.. image:: images/intro.svg + :alt: Intro graph + +.. figure:: images/GraphkitLegend.svg + :alt: Graphkit Legend + + The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. + +The same ``plot()`` methods are defined on a :class:`NetworkOperation`, +:class:`Network` & :class:`ExecutionPlan`, each one capable to produce diagrams +with increasing complexity. Whenever possible, the top-level ``plot()`` methods +delegates to the ones below. + +For instance, when a net-operation has just been composed, plotting it will +come out bare bone, with just the 2 types of nodes (data & operations), their +dependencies, and the sequence of the execution-plan. + +But as soon as you run it, the net plot calls will print more of the internals. +These are based on the ``graph_op.net.last_plan`` attribute which *caches* +the last run to inspect it. If you want the bare-bone diagram, simply reset it:: + + netop.net.last_plan = None + +.. Note:: + For plots, `Graphviz `_ program must be in your PATH, + and ``pydot`` & ``matplotlib`` python packages installed. + You may install both when installing ``graphkit`` with its ``plot`` extras:: + + pip install graphkit[plot] + +.. Tip:: + The `pydot.Dot `_ instances returned by ``plot()`` + are rendered directly in *Jupyter/IPython* notebooks as SVG images. + + +.. _debugging: + +Errors & debugging +------------------ + +If an operation fails, the original exception gets annotated +with the folllowing properties, as a debug aid: + +>>> from pprint import pprint + +>>> def scream(*args): +... raise ValueError("Wrong!") + +>>> try: +... compose("errgraph")( +... operation(name="screamer", needs=['a'], provides=["foo"])(scream) +... )({'a': None}) +... except ValueError as ex: +... pprint(ex.graphkit_aid) +{'network': + ... + 'operation': FunctionalOperation(name='screamer', needs=['a'], provides=['foo']), + 'operation_args': {'args': [None], 'kwargs': {}}, + 'operation_fnouts': None, + 'operation_outs': None, + 'operation_results': None, + 'plan': ExecutionPlan(inputs=('a',), outputs=(), steps: + +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])), + 'solution': {'a': None}} + + +The following annotated attributes might have values on an exception ``ex``: + +``ex.network`` + the innermost network owning the failed operation/function + +``ex.plan`` + the innermost plan that executing when a operation crashed + +``ex.operation`` + the innermost operation that failed + +``ex.operation_args`` + either a 2-tuple ``(args, kwargs)`` or just the ``args`` fed to the operation + +``ex.operation_fnouts`` + the names of the outputs the function was expected to return + +``ex.operation_outs`` + the names eventually the graph needed from the operation + (a subset of the above) + +``ex.operation_results`` + the values dict, if any; it maybe a *zip* of the provides + with the actual returned values of the function, ot the raw results. + +.. note:: + The :ref:`plotting` capabilities, along with the above annotation of exceptions + with the internal state of plan/operation often renders a debugger session + unnecessary. But since the state of the annotated values might be incomple, + you may not always avoid one. + + +Execution internals +------------------- +.. automodule:: graphkit.network + :noindex: From 863c7eb6dc2884e8a6c20d1ee4040008d17ac80e Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 17:47:42 +0300 Subject: [PATCH 140/167] refact(doc): simplify composition.rst name --- docs/source/{graph_composition.rst => composition.rst} | 0 docs/source/index.rst | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/source/{graph_composition.rst => composition.rst} (100%) diff --git a/docs/source/graph_composition.rst b/docs/source/composition.rst similarity index 100% rename from docs/source/graph_composition.rst rename to docs/source/composition.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index dc503b23..34fa7f2a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -28,7 +28,7 @@ or become the core of a custom ETL pipelne. :maxdepth: 2 operations - graph_composition + composition plotting reference changes From a5cd5a7b225c9ca3bd318027905aa7358ecfd883 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 19:13:08 +0300 Subject: [PATCH 141/167] DOC(PLOT): put BAREBONE in the top --- README.md | 17 +- docs/source/composition.rst | 2 +- docs/source/images/barebone_2ops.svg | 86 +++++++++++ docs/source/images/barebone_3ops.svg | 110 +++++++++++++ docs/source/images/bigger_example_graph.svg | 4 - docs/source/images/executed_3ops.svg | 145 ++++++++++++++++++ .../source/images/{intro.svg => plotting.svg} | 0 docs/source/index.rst | 2 +- docs/source/operations.rst | 2 +- docs/source/plotting.rst | 27 ++-- 10 files changed, 371 insertions(+), 24 deletions(-) create mode 100644 docs/source/images/barebone_2ops.svg create mode 100644 docs/source/images/barebone_3ops.svg delete mode 100644 docs/source/images/bigger_example_graph.svg create mode 100644 docs/source/images/executed_3ops.svg rename docs/source/images/{intro.svg => plotting.svg} (100%) diff --git a/README.md b/README.md index a04d58db..eeaa14c1 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,8 @@ > It's a DAG all the way down -![Sample graph](docs/source/images/test_pruning_not_overrides_given_intermediate-asked.png "Sample graph") +simple graphkit computation ## Lightweight computation graphs for Python @@ -55,6 +56,9 @@ multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): ... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) ... ) + + >>> # Run the graph and request all of the outputs. >>> out = graphop({'a': 2, 'b': 5}) >>> print(out) @@ -65,23 +69,24 @@ multiple outputs (`a * b`, `a - a * b`, and `abs(a - a * b) ** 3`): >>> print(out) {'a_minus_ab': -8} - +simple graphkit computation + As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! ## Plotting -For debugging the above graph-operation you may plot the *execution plan* -of the last computation it using these methods: +For debugging the above graph-operation you may plot either the newly omposed graph or the *execution plan* of the last computation executed, +using these methods: ```python graphop.plot(show=True) # open a matplotlib window -graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... +graphop.plot("graphop.svg") # other supported formats: png, jpg, pdf, ... graphop.plot() # without arguments return a pydot.DOT object graphop.plot(solution=out) # annotate graph with solution values ``` -![Intro graph](docs/source/images/intro.svg "Intro graph") ![Graphkit Legend](docs/source/images/GraphkitLegend.svg "Graphkit Legend") > **TIP:** The `pydot.Dot` instances returned by `plot()` are rendered as SVG in *Jupyter/IPython*. diff --git a/docs/source/composition.rst b/docs/source/composition.rst index c86bf0c0..c770f9d3 100644 --- a/docs/source/composition.rst +++ b/docs/source/composition.rst @@ -39,7 +39,7 @@ The simplest use case for ``compose`` is assembling a collection of individual o The call here to ``compose()`` yields a runnable computation graph that looks like this (where the circles are operations, squares are data, and octagons are parameters): -.. image:: images/intro.svg +.. image:: images/barebone_3ops.svg .. _graph-computations: diff --git a/docs/source/images/barebone_2ops.svg b/docs/source/images/barebone_2ops.svg new file mode 100644 index 00000000..fffad243 --- /dev/null +++ b/docs/source/images/barebone_2ops.svg @@ -0,0 +1,86 @@ + + + + + + +G + +pipeline + + +a + +a + + + +mul1 + +mul1 + + + +a->mul1 + + + + + +sub1 + +sub1 + + + +a->sub1 + + + + + +ab + +ab + + + +mul1->ab + + + + + +b + +b + + + +b->mul1 + + + + + +ab->sub1 + + + + + +a_minus_ab + +a_minus_ab + + + +sub1->a_minus_ab + + + + + diff --git a/docs/source/images/barebone_3ops.svg b/docs/source/images/barebone_3ops.svg new file mode 100644 index 00000000..6cd55957 --- /dev/null +++ b/docs/source/images/barebone_3ops.svg @@ -0,0 +1,110 @@ + + + + + + +G + +graphop + + +a + +a + + + +mul1 + +mul1 + + + +a->mul1 + + + + + +sub1 + +sub1 + + + +a->sub1 + + + + + +ab + +ab + + + +mul1->ab + + + + + +b + +b + + + +b->mul1 + + + + + +ab->sub1 + + + + + +a_minus_ab + +a_minus_ab + + + +sub1->a_minus_ab + + + + + +abspow1 + +abspow1 + + + +a_minus_ab->abspow1 + + + + + +abs_a_minus_ab_cubed + +abs_a_minus_ab_cubed + + + +abspow1->abs_a_minus_ab_cubed + + + + + diff --git a/docs/source/images/bigger_example_graph.svg b/docs/source/images/bigger_example_graph.svg deleted file mode 100644 index c631b2a6..00000000 --- a/docs/source/images/bigger_example_graph.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/docs/source/images/executed_3ops.svg b/docs/source/images/executed_3ops.svg new file mode 100644 index 00000000..d8ffa4e3 --- /dev/null +++ b/docs/source/images/executed_3ops.svg @@ -0,0 +1,145 @@ + + + + + + +G + +graphop + + +a + +a + + + +mul1 + +mul1 + + + +a->mul1 + + + + + +ab + +ab + + + +a->ab + + +4 + + + +sub1 + +sub1 + + + +a->sub1 + + + + + +b + +b + + + +mul1->b + + +1 + + + +mul1->ab + + + + + +b->mul1 + + + + + +b->sub1 + + +2 + + + +ab->sub1 + + + + + +abspow1 + +abspow1 + + + +ab->abspow1 + + +5 + + + +sub1->a + + +3 + + + +a_minus_ab + +a_minus_ab + + + +sub1->a_minus_ab + + + + + +a_minus_ab->abspow1 + + + + + +abs_a_minus_ab_cubed + +abs_a_minus_ab_cubed + + + +abspow1->abs_a_minus_ab_cubed + + + + + diff --git a/docs/source/images/intro.svg b/docs/source/images/plotting.svg similarity index 100% rename from docs/source/images/intro.svg rename to docs/source/images/plotting.svg diff --git a/docs/source/index.rst b/docs/source/index.rst index 34fa7f2a..c7fa98a0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -139,7 +139,7 @@ Code licensed under the Apache License, Version 2.0 license. See LICENSE file fo :target: https://www.apache.org/licenses/LICENSE-2.0 :alt: Apache License, version 2.0 -.. |sample-plot| image:: images/sample_plot.svg +.. |sample-plot| image:: images/barebone_2ops.svg :alt: sample graphkit plot :width: 120px :align: middle diff --git a/docs/source/operations.rst b/docs/source/operations.rst index 55a84ebd..f3152134 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -60,7 +60,7 @@ Let's look again at the operations from the script in :ref:`quick-start`, for ex The ``needs`` and ``provides`` arguments to the operations in this script define a computation graph that looks like this (where the oval are operations, squares/houses are data): -.. image:: images/intro.svg +.. image:: images/barebone_3ops.svg .. Tip:: See :ref:`plotting` on how to make diagrams like this. diff --git a/docs/source/plotting.rst b/docs/source/plotting.rst index ebb4e90a..77530bc9 100644 --- a/docs/source/plotting.rst +++ b/docs/source/plotting.rst @@ -12,16 +12,17 @@ You may plot the original plot and annotate on top the *execution plan* and solution of the last computation, calling methods with arguments like this:: graphop.plot(show=True) # open a matplotlib window - graphop.plot("intro.svg") # other supported formats: png, jpg, pdf, ... + graphop.plot("graphop.svg") # other supported formats: png, jpg, pdf, ... graphop.plot() # without arguments return a pydot.DOT object graphop.plot(solution=out) # annotate graph with solution values -.. image:: images/intro.svg - :alt: Intro graph +.. image:: images/executed_3ops.svg + :alt: execution plan .. figure:: images/GraphkitLegend.svg :alt: Graphkit Legend - + :width: 100% + The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. The same ``plot()`` methods are defined on a :class:`NetworkOperation`, @@ -33,6 +34,9 @@ For instance, when a net-operation has just been composed, plotting it will come out bare bone, with just the 2 types of nodes (data & operations), their dependencies, and the sequence of the execution-plan. +.. image:: images/barebone_3ops.svg + :alt: barebone graph + But as soon as you run it, the net plot calls will print more of the internals. These are based on the ``graph_op.net.last_plan`` attribute which *caches* the last run to inspect it. If you want the bare-bone diagram, simply reset it:: @@ -59,6 +63,7 @@ Errors & debugging If an operation fails, the original exception gets annotated with the folllowing properties, as a debug aid: +>>> from graphkit import compose, operation >>> from pprint import pprint >>> def scream(*args): @@ -84,26 +89,26 @@ with the folllowing properties, as a debug aid: The following annotated attributes might have values on an exception ``ex``: -``ex.network`` +``network`` the innermost network owning the failed operation/function -``ex.plan`` +``plan`` the innermost plan that executing when a operation crashed -``ex.operation`` +``operation`` the innermost operation that failed -``ex.operation_args`` +``operation_args`` either a 2-tuple ``(args, kwargs)`` or just the ``args`` fed to the operation -``ex.operation_fnouts`` +``operation_fnouts`` the names of the outputs the function was expected to return -``ex.operation_outs`` +``operation_outs`` the names eventually the graph needed from the operation (a subset of the above) -``ex.operation_results`` +``operation_results`` the values dict, if any; it maybe a *zip* of the provides with the actual returned values of the function, ot the raw results. From 3583cbb0a7da851a56e783a12432e18c596ec0cd Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 22:47:36 +0300 Subject: [PATCH 142/167] refact(net): REORDER SOURCES --- graphkit/network.py | 642 ++++++++++++++++++++++---------------------- 1 file changed, 323 insertions(+), 319 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 02abd9a7..39963685 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -7,7 +7,7 @@ COMPILE: prune unsatisfied nodes, sort dag topologically & solve it, and - derive the *execution steps* (see below) based on the given *inputs* + derive the *execution steps* (see below) based on the given *inputs* and asked *outputs*. EXECUTE: @@ -136,6 +136,260 @@ def __repr__(self): return 'PinInstruction("%s")' % self +# TODO: maybe class Solution(object): +# values = {} +# overwrites = None + + +class ExecutionPlan( + namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps executed"), + plot.Plotter, +): + """ + The result of the network's compilation phase. + + Note the execution plan's attributes are on purpose immutable tuples. + + :ivar net: + The parent :class:`Network` + + :ivar inputs: + A tuple with the names of the given inputs used to construct the plan. + + :ivar outputs: + A (possibly empy) tuple with the names of the requested outputs + used to construct the plan. + + :ivar dag: + The regular (not broken) *pruned* subgraph of net-graph. + + :ivar broken_edges: + Tuple of broken incoming edges to given data. + + :ivar steps: + The tuple of operation-nodes & *instructions* needed to evaluate + the given inputs & asked outputs, free memory and avoid overwritting + any given intermediate inputs. + :ivar executed: + An empty set to collect all operations that have been executed so far. + """ + + @property + def broken_dag(self): + return nx.restricted_view(self.dag, nodes=(), edges=self.broken_edges) + + def _build_pydot(self, **kws): + from .plot import build_pydot + + clusters = None + if self.dag.nodes != self.net.graph.nodes: + clusters = {n: "after prunning" for n in self.dag.nodes} + mykws = { + "graph": self.net.graph, + "steps": self.steps, + "inputs": self.inputs, + "outputs": self.outputs, + "executed": self.executed, + "edge_props": { + e: {"color": "wheat", "penwidth": 2} for e in self.broken_edges + }, + "clusters": clusters, + } + mykws.update(kws) + + return build_pydot(**mykws) + + def __repr__(self): + steps = ["\n +--%s" % s for s in self.steps] + return "ExecutionPlan(inputs=%s, outputs=%s, steps:%s)" % ( + self.inputs, + self.outputs, + "".join(steps), + ) + + def get_data_node(self, name): + """ + Retuen the data node from a graph using its name, or None. + """ + node = self.dag.nodes[name] + if isinstance(node, DataPlaceholderNode): + return node + + def _can_schedule_operation(self, op): + """ + Determines if a Operation is ready to be scheduled for execution + + based on what has already been executed. + + :param op: + The Operation object to check + :return: + A boolean indicating whether the operation may be scheduled for + execution based on what has already been executed. + """ + # Use `broken_dag` to allow executing operations after given inputs + # regardless of whether their producers have yet to run. + dependencies = set( + n for n in nx.ancestors(self.broken_dag, op) if isinstance(n, Operation) + ) + return dependencies.issubset(self.executed) + + def _can_evict_value(self, name): + """ + Determines if a DataPlaceholderNode is ready to be deleted from solution. + + :param name: + The name of the data node to check + :return: + A boolean indicating whether the data node can be deleted or not. + """ + data_node = self.get_data_node(name) + # Use `broken_dag` not to block a successor waiting for this data, + # since in any case will use a given input, not some pipe of this data. + return data_node and set(self.broken_dag.successors(data_node)).issubset( + self.executed + ) + + def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): + value_name = str(value_name) + if overwrites is not None: + overwrites[value_name] = solution[value_name] + solution[value_name] = inputs[value_name] + + def _call_operation(self, op, solution): + try: + return op._compute(solution) + except Exception as ex: + ## Annotate exception with debugging aid on errors. + # + err_aid = getattr(ex, "graphkit_aid", {}) + err_aid.setdefault("plan", self) + setattr(ex, "graphkit_aid", err_aid) + raise + + def _execute_thread_pool_barrier_method( + self, inputs, solution, overwrites, thread_pool_size=10 + ): + """ + This method runs the graph using a parallel pool of thread executors. + You may achieve lower total latency if your graph is sufficiently + sub divided into operations using this method. + """ + from multiprocessing.dummy import Pool + + # if we have not already created a thread_pool, create one + if not hasattr(self.net, "_thread_pool"): + self.net._thread_pool = Pool(thread_pool_size) + pool = self.net._thread_pool + + # with each loop iteration, we determine a set of operations that can be + # scheduled, then schedule them onto a thread pool, then collect their + # results onto a memory solution for use upon the next iteration. + while True: + + # the upnext list contains a list of operations for scheduling + # in the current round of scheduling + upnext = [] + for node in self.steps: + if ( + isinstance(node, Operation) + and self._can_schedule_operation(node) + and node not in self.executed + ): + upnext.append(node) + elif isinstance(node, DeleteInstruction): + # Only delete if all successors for the data node + # have been executed. + # An optional need may not have a value in the solution. + if node in solution and self._can_evict_value(node): + log.debug("removing data '%s' from solution.", node) + del solution[node] + elif isinstance(node, PinInstruction): + # Always and repeatedely pin the value, even if not all + # providers of the data have executed. + # An optional need may not have a value in the solution. + if node in solution: + self._pin_data_in_solution(node, solution, inputs, overwrites) + + # stop if no nodes left to schedule, exit out of the loop + if len(upnext) == 0: + break + + ## TODO: accept pool from caller + done_iterator = pool.imap_unordered( + (lambda op: (op, self._call_operation(op, solution))), upnext + ) + + for op, result in done_iterator: + solution.update(result) + self.executed.add(op) + + def _execute_sequential_method(self, inputs, solution, overwrites): + """ + This method runs the graph one operation at a time in a single thread + """ + self.times = {} + for step in self.steps: + + if isinstance(step, Operation): + + log.debug("%sexecuting step: %s", "-" * 32, step.name) + + # time execution... + t0 = time.time() + + # compute layer outputs + layer_outputs = self._call_operation(step, solution) + + # add outputs to solution + solution.update(layer_outputs) + self.executed.add(step) + + # record execution time + t_complete = round(time.time() - t0, 5) + self.times[step.name] = t_complete + log.debug("step completion time: %s", t_complete) + + elif isinstance(step, DeleteInstruction): + # Cache value may be missing if it is optional. + if step in solution: + log.debug("removing data '%s' from solution.", step) + del solution[step] + + elif isinstance(step, PinInstruction): + self._pin_data_in_solution(step, solution, inputs, overwrites) + else: + raise AssertionError("Unrecognized instruction.%r" % step) + + def execute(self, solution, overwrites=None, method=None): + """ + :param solution: + a mutable maping to collect the results and that must contain also + the given input values for at least the compulsory inputs that + were specified when the plan was built (but cannot enforce that!). + + :param overwrites: + (optional) a mutable dict to collect calculated-but-discarded values + because they were "pinned" by input vaules. + If missing, the overwrites values are simply discarded. + """ + # Clean executed operation from any previous execution. + self.executed.clear() + + # choose a method of execution + executor = ( + self._execute_thread_pool_barrier_method + if method == "parallel" + else self._execute_sequential_method + ) + + # clone and keep orignal inputs in solution intact + executor(dict(solution), solution, overwrites) + + # return it, but caller can also see the results in `solution` dict. + return solution + + class Network(plot.Plotter): """ Assemble operations & data into a directed-acyclic-graph (DAG) to run them. @@ -199,70 +453,6 @@ def add_op(self, operation): kw["sideffect"] = True self.graph.add_edge(operation, DataPlaceholderNode(p), **kw) - def _build_execution_steps(self, dag, inputs, outputs): - """ - Create the list of operation-nodes & *instructions* evaluating all - - operations & instructions needed a) to free memory and b) avoid - overwritting given intermediate inputs. - - :param dag: - The original dag, pruned; not broken. - :param outputs: - outp-names to decide whether to add (and which) del-instructions - - In the list :class:`DeleteInstructions` steps (DA) are inserted between - operation nodes to reduce the memory footprint of solution. - A DA is inserted whenever a *need* is not used by any other *operation* - further down the DAG. - Note that since the `solutions` are not shared across `compute()` calls, - any memory-reductions are for as long as a single computation runs. - - """ - - steps = [] - - # create an execution order such that each layer's needs are provided. - ordered_nodes = iset(nx.topological_sort(dag)) - - # Add Operations evaluation steps, and instructions to free and "pin" - # data. - for i, node in enumerate(ordered_nodes): - - if isinstance(node, DataPlaceholderNode): - if node in inputs and dag.pred[node]: - # Command pinning only when there is another operation - # generating this data as output. - steps.append(PinInstruction(node)) - - elif isinstance(node, Operation): - steps.append(node) - - # Keep all values in solution if not specific outputs asked. - if not outputs: - continue - - # Add instructions to delete predecessors as possible. A - # predecessor may be deleted if it is a data placeholder that - # is no longer needed by future Operations. - for need in self.graph.pred[node]: - log.debug("checking if node %s can be deleted", need) - for future_node in ordered_nodes[i + 1 :]: - if ( - isinstance(future_node, Operation) - and need in future_node.needs - ): - break - else: - if need not in outputs: - log.debug(" adding delete instruction for %s", need) - steps.append(DeleteInstruction(need)) - - else: - raise AssertionError("Unrecognized network graph node %r" % node) - - return steps - def _collect_unsatisfied_operations(self, dag, inputs): """ Traverse topologically sorted dag to collect un-satisfied operations. @@ -377,7 +567,74 @@ def _prune_graph(self, outputs, inputs): # Clone it so that it is picklable. pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied).copy() - return pruned_dag, broken_edges + return pruned_dag, broken_edges + + def _build_execution_steps(self, dag, inputs, outputs): + """ + Create the list of operation-nodes & *instructions* evaluating all + + operations & instructions needed a) to free memory and b) avoid + overwritting given intermediate inputs. + + :param dag: + The original dag, pruned; not broken. + :param outputs: + outp-names to decide whether to add (and which) del-instructions + + In the list :class:`DeleteInstructions` steps (DA) are inserted between + operation nodes to reduce the memory footprint of solution. + A DA is inserted whenever a *need* is not used by any other *operation* + further down the DAG. + Note that since the `solutions` are not shared across `compute()` calls, + any memory-reductions are for as long as a single computation runs. + + """ + + steps = [] + + # create an execution order such that each layer's needs are provided. + ordered_nodes = iset(nx.topological_sort(dag)) + + # Add Operations evaluation steps, and instructions to free and "pin" + # data. + for i, node in enumerate(ordered_nodes): + + if isinstance(node, DataPlaceholderNode): + if node in inputs and dag.pred[node]: + # Command pinning only when there is another operation + # generating this data as output. + steps.append(PinInstruction(node)) + + elif isinstance(node, Operation): + steps.append(node) + + # Keep all values in solution if not specific outputs asked. + if not outputs: + continue + + # Add instructions to delete predecessors as possible. A + # predecessor may be deleted if it is a data placeholder that + # is no longer needed by future Operations. + # It shouldn't make a difference if it were the broken dag + # bc these are preds of data (provides), and we scan here + # preds of ops (need). + for need in dag.pred[node]: + log.debug("checking if node %s can be deleted", need) + for future_node in ordered_nodes[i + 1 :]: + if ( + isinstance(future_node, Operation) + and need in future_node.needs + ): + break + else: + if need not in outputs: + log.debug(" adding delete instruction for %s", need) + steps.append(DeleteInstruction(need)) + + else: + raise AssertionError("Unrecognized network graph node %r" % node) + + return steps def compile(self, inputs=(), outputs=()): """ @@ -487,256 +744,3 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) setattr(ex, "graphkit_aid", err_aid) raise - -class ExecutionPlan( - namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps executed"), - plot.Plotter, -): - """ - The result of the network's compilation phase. - - Note the execution plan's attributes are on purpose immutable tuples. - - :ivar net: - The parent :class:`Network` - - :ivar inputs: - A tuple with the names of the given inputs used to construct the plan. - - :ivar outputs: - A (possibly empy) tuple with the names of the requested outputs - used to construct the plan. - - :ivar dag: - The regular (not broken) *pruned* subgraph of net-graph. - - :ivar broken_edges: - Tuple of broken incoming edges to given data. - - :ivar steps: - The tuple of operation-nodes & *instructions* needed to evaluate - the given inputs & asked outputs, free memory and avoid overwritting - any given intermediate inputs. - :ivar executed: - An empty set to collect all operations that have been executed so far. - """ - - @property - def broken_dag(self): - return nx.restricted_view(self.dag, nodes=(), edges=self.broken_edges) - - def _build_pydot(self, **kws): - from .plot import build_pydot - - clusters = None - if self.dag.nodes != self.net.graph.nodes: - clusters = {n: "after prunning" for n in self.dag.nodes} - mykws = { - "graph": self.net.graph, - "steps": self.steps, - "inputs": self.inputs, - "outputs": self.outputs, - "executed": self.executed, - "edge_props": { - e: {"color": "wheat", "penwidth": 2} for e in self.broken_edges - }, - "clusters": clusters, - } - mykws.update(kws) - - return build_pydot(**mykws) - - def __repr__(self): - steps = ["\n +--%s" % s for s in self.steps] - return "ExecutionPlan(inputs=%s, outputs=%s, steps:%s)" % ( - self.inputs, - self.outputs, - "".join(steps), - ) - - def get_data_node(self, name): - """ - Retuen the data node from a graph using its name, or None. - """ - node = self.dag.nodes[name] - if isinstance(node, DataPlaceholderNode): - return node - - def _can_schedule_operation(self, op): - """ - Determines if a Operation is ready to be scheduled for execution - - based on what has already been executed. - - :param op: - The Operation object to check - :return: - A boolean indicating whether the operation may be scheduled for - execution based on what has already been executed. - """ - # Use `broken_dag` to allow executing operations after given inputs - # regardless of whether their producers have yet to run. - dependencies = set( - n for n in nx.ancestors(self.broken_dag, op) if isinstance(n, Operation) - ) - return dependencies.issubset(self.executed) - - def _can_evict_value(self, name): - """ - Determines if a DataPlaceholderNode is ready to be deleted from solution. - - :param name: - The name of the data node to check - :return: - A boolean indicating whether the data node can be deleted or not. - """ - data_node = self.get_data_node(name) - # Use `broken_dag` not to block a successor waiting for this data, - # since in any case will use a given input, not some pipe of this data. - return data_node and set(self.broken_dag.successors(data_node)).issubset( - self.executed - ) - - def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): - value_name = str(value_name) - if overwrites is not None: - overwrites[value_name] = solution[value_name] - solution[value_name] = inputs[value_name] - - def _call_operation(self, op, solution): - try: - return op._compute(solution) - except Exception as ex: - ## Annotate exception with debugging aid on errors. - # - err_aid = getattr(ex, "graphkit_aid", {}) - err_aid.setdefault("plan", self) - setattr(ex, "graphkit_aid", err_aid) - raise - - def _execute_thread_pool_barrier_method( - self, inputs, solution, overwrites, thread_pool_size=10 - ): - """ - This method runs the graph using a parallel pool of thread executors. - You may achieve lower total latency if your graph is sufficiently - sub divided into operations using this method. - """ - from multiprocessing.dummy import Pool - - # if we have not already created a thread_pool, create one - if not hasattr(self.net, "_thread_pool"): - self.net._thread_pool = Pool(thread_pool_size) - pool = self.net._thread_pool - - # with each loop iteration, we determine a set of operations that can be - # scheduled, then schedule them onto a thread pool, then collect their - # results onto a memory solution for use upon the next iteration. - while True: - - # the upnext list contains a list of operations for scheduling - # in the current round of scheduling - upnext = [] - for node in self.steps: - if ( - isinstance(node, Operation) - and self._can_schedule_operation(node) - and node not in self.executed - ): - upnext.append(node) - elif isinstance(node, DeleteInstruction): - # Only delete if all successors for the data node - # have been executed. - # An optional need may not have a value in the solution. - if node in solution and self._can_evict_value(node): - log.debug("removing data '%s' from solution.", node) - del solution[node] - elif isinstance(node, PinInstruction): - # Always and repeatedely pin the value, even if not all - # providers of the data have executed. - # An optional need may not have a value in the solution. - if node in solution: - self._pin_data_in_solution(node, solution, inputs, overwrites) - - # stop if no nodes left to schedule, exit out of the loop - if len(upnext) == 0: - break - - ## TODO: accept pool from caller - done_iterator = pool.imap_unordered( - (lambda op: (op, self._call_operation(op, solution))), upnext - ) - - for op, result in done_iterator: - solution.update(result) - self.executed.add(op) - - def _execute_sequential_method(self, inputs, solution, overwrites): - """ - This method runs the graph one operation at a time in a single thread - """ - self.times = {} - for step in self.steps: - - if isinstance(step, Operation): - - log.debug("%sexecuting step: %s", "-" * 32, step.name) - - # time execution... - t0 = time.time() - - # compute layer outputs - layer_outputs = self._call_operation(step, solution) - - # add outputs to solution - solution.update(layer_outputs) - self.executed.add(step) - - # record execution time - t_complete = round(time.time() - t0, 5) - self.times[step.name] = t_complete - log.debug("step completion time: %s", t_complete) - - elif isinstance(step, DeleteInstruction): - # Cache value may be missing if it is optional. - if step in solution: - log.debug("removing data '%s' from solution.", step) - del solution[step] - - elif isinstance(step, PinInstruction): - self._pin_data_in_solution(step, solution, inputs, overwrites) - else: - raise AssertionError("Unrecognized instruction.%r" % step) - - def execute(self, solution, overwrites=None, method=None): - """ - :param solution: - a mutable maping to collect the results and that must contain also - the given input values for at least the compulsory inputs that - were specified when the plan was built (but cannot enforce that!). - - :param overwrites: - (optional) a mutable dict to collect calculated-but-discarded values - because they were "pinned" by input vaules. - If missing, the overwrites values are simply discarded. - """ - # Clean executed operation from any previous execution. - self.executed.clear() - - # choose a method of execution - executor = ( - self._execute_thread_pool_barrier_method - if method == "parallel" - else self._execute_sequential_method - ) - - # clone and keep orignal inputs in solution intact - executor(dict(solution), solution, overwrites) - - # return it, but caller can also see the results in `solution` dict. - return solution - - -# TODO: maybe class Solution(object): -# values = {} -# overwrites = None From 6b22e19fc7d65df6121b1a45780a1b0e1b477e0f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Fri, 11 Oct 2019 23:39:20 +0300 Subject: [PATCH 143/167] REFACT(net): Delete-->Evict, -Placeholder, privatize node-classes --- CHANGES.rst | 4 +-- graphkit/network.py | 79 +++++++++++++++++++++---------------------- graphkit/plot.py | 8 ++--- test/test_graphkit.py | 28 +++++++-------- 4 files changed, 58 insertions(+), 61 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 381f3338..d60a2711 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,7 +34,7 @@ Network: + ENH(:gh:`26`): "Pin* input values that may be overriten by calculated ones. - This required the introduction of the new :class:`PinInstruction` in + This required the introduction of the new :class:`_PinInstruction` in the execution plan. + FIX(:gh:`23`, :gh:`22`-2.4.3): Keep consistent order of ``networkx.DiGraph`` @@ -63,7 +63,7 @@ Plotting: - execution-steps & order - - delete & pin instructions + - evict & pin instructions - given inputs & asked outputs - solution values (just if they are present) - "optional" needs & broken links during pruning diff --git a/graphkit/network.py b/graphkit/network.py index 39963685..797e497d 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -18,7 +18,7 @@ :attr:`Network.graph` A ``networkx`` graph (yet a DAG) containing interchanging layers of - :class:`Operation` and :class:`DataPlaceholderNode` nodes. + :class:`Operation` and :class:`_DataNode` nodes. They are layed out and connected by repeated calls of :meth:`~Network.add_OP`. @@ -36,7 +36,7 @@ :attr:`ExecutionPlan.steps` It is the list of the operation-nodes only from the dag (above), topologically sorted, and interspersed with - *instructions steps* needed to complete the run. + *instruction steps* needed to complete the run. It is built by :meth:`~Network._build_execution_steps()` based on the subgraph dag extracted above. The containing :class:`ExecutionPlan.steps` instance is cached @@ -44,11 +44,11 @@ The *instructions* items achieve the following: - - :class:`DeleteInstruction`: delete items from `solution` as soon as + - :class:`_EvictInstruction`: evicts items from `solution` as soon as they are not needed further down the dag, to reduce memory footprint while computing. - - :class:`PinInstruction`: avoid overwritting any given intermediate + - :class:`_PinInstruction`: avoid overwritting any given intermediate inputs, and still allow their providing operations to run (because they are needed for their other outputs). @@ -97,18 +97,18 @@ from networkx import OrderedDiGraph as DiGraph -class DataPlaceholderNode(str): +class _DataNode(str): """ Dag node naming a data-value produced or required by an operation. """ def __repr__(self): - return 'DataPlaceholderNode("%s")' % self + return 'DataNone("%s")' % self -class DeleteInstruction(str): +class _EvictInstruction(str): """ - Execution step to delete a computed value from the `solution`. + Execution step to evict a computed value from the `solution`. It's a step in :attr:`ExecutionPlan.steps` for the data-node `str` that frees its data-value from `solution` after it is no longer needed, @@ -116,12 +116,12 @@ class DeleteInstruction(str): """ def __repr__(self): - return 'DeleteInstruction("%s")' % self + return 'EvictInstruction("%s")' % self -class PinInstruction(str): +class _PinInstruction(str): """ - Execution step to replace a computed value in the `solution` from the inputs, + Execution step to overwrite a computed value in the `solution` from the inputs, and to store the computed one in the ``overwrites`` instead (both `solution` & ``overwrites`` are local-vars in :meth:`~Network.compute()`). @@ -212,7 +212,7 @@ def get_data_node(self, name): Retuen the data node from a graph using its name, or None. """ node = self.dag.nodes[name] - if isinstance(node, DataPlaceholderNode): + if isinstance(node, _DataNode): return node def _can_schedule_operation(self, op): @@ -236,12 +236,12 @@ def _can_schedule_operation(self, op): def _can_evict_value(self, name): """ - Determines if a DataPlaceholderNode is ready to be deleted from solution. + Determines if a _DataNode is ready to be evicted from solution. :param name: The name of the data node to check :return: - A boolean indicating whether the data node can be deleted or not. + A boolean indicating whether the data node can be evicted or not. """ data_node = self.get_data_node(name) # Use `broken_dag` not to block a successor waiting for this data, @@ -297,14 +297,14 @@ def _execute_thread_pool_barrier_method( and node not in self.executed ): upnext.append(node) - elif isinstance(node, DeleteInstruction): - # Only delete if all successors for the data node + elif isinstance(node, _EvictInstruction): + # Only evict if all successors for the data node # have been executed. # An optional need may not have a value in the solution. if node in solution and self._can_evict_value(node): log.debug("removing data '%s' from solution.", node) del solution[node] - elif isinstance(node, PinInstruction): + elif isinstance(node, _PinInstruction): # Always and repeatedely pin the value, even if not all # providers of the data have executed. # An optional need may not have a value in the solution. @@ -350,13 +350,13 @@ def _execute_sequential_method(self, inputs, solution, overwrites): self.times[step.name] = t_complete log.debug("step completion time: %s", t_complete) - elif isinstance(step, DeleteInstruction): + elif isinstance(step, _EvictInstruction): # Cache value may be missing if it is optional. if step in solution: log.debug("removing data '%s' from solution.", step) del solution[step] - elif isinstance(step, PinInstruction): + elif isinstance(step, _PinInstruction): self._pin_data_in_solution(step, solution, inputs, overwrites) else: raise AssertionError("Unrecognized instruction.%r" % step) @@ -444,14 +444,14 @@ def add_op(self, operation): kw["optional"] = True if isinstance(n, sideffect): kw["sideffect"] = True - self.graph.add_edge(DataPlaceholderNode(n), operation, **kw) + self.graph.add_edge(_DataNode(n), operation, **kw) # add nodes and edges to graph describing what this layer provides for p in operation.provides: kw = {} if isinstance(n, sideffect): kw["sideffect"] = True - self.graph.add_edge(operation, DataPlaceholderNode(p), **kw) + self.graph.add_edge(operation, _DataNode(p), **kw) def _collect_unsatisfied_operations(self, dag, inputs): """ @@ -498,7 +498,7 @@ def _collect_unsatisfied_operations(self, dag, inputs): else: # Prune operations with partial inputs. unsatisfied.append(node) - elif isinstance(node, (DataPlaceholderNode, str)): # `str` are givens + elif isinstance(node, (_DataNode, str)): # `str` are givens if node in ok_data: # mark satisfied-needs on all future operations for future_op in dag.adj[node]: @@ -579,15 +579,13 @@ def _build_execution_steps(self, dag, inputs, outputs): :param dag: The original dag, pruned; not broken. :param outputs: - outp-names to decide whether to add (and which) del-instructions - - In the list :class:`DeleteInstructions` steps (DA) are inserted between - operation nodes to reduce the memory footprint of solution. - A DA is inserted whenever a *need* is not used by any other *operation* - further down the DAG. - Note that since the `solutions` are not shared across `compute()` calls, - any memory-reductions are for as long as a single computation runs. + outp-names to decide whether to add (and which) evict-instructions + Instances of :class:`_EvictInstructions` are inserted in `steps` between + operation nodes to reduce the memory footprint of solutions while + the computation is running. + An evict-instruction is inserted whenever a *need* is not used + by any other *operation* further down the DAG. """ steps = [] @@ -599,11 +597,11 @@ def _build_execution_steps(self, dag, inputs, outputs): # data. for i, node in enumerate(ordered_nodes): - if isinstance(node, DataPlaceholderNode): + if isinstance(node, _DataNode): if node in inputs and dag.pred[node]: - # Command pinning only when there is another operation + # Add a pin-instruction only when there is another operation # generating this data as output. - steps.append(PinInstruction(node)) + steps.append(_PinInstruction(node)) elif isinstance(node, Operation): steps.append(node) @@ -612,14 +610,14 @@ def _build_execution_steps(self, dag, inputs, outputs): if not outputs: continue - # Add instructions to delete predecessors as possible. A - # predecessor may be deleted if it is a data placeholder that + # Add instructions to evict predecessors as possible. A + # predecessor may be evicted if it is a data placeholder that # is no longer needed by future Operations. # It shouldn't make a difference if it were the broken dag # bc these are preds of data (provides), and we scan here # preds of ops (need). for need in dag.pred[node]: - log.debug("checking if node %s can be deleted", need) + log.debug("checking if node %s can be evicted", need) for future_node in ordered_nodes[i + 1 :]: if ( isinstance(future_node, Operation) @@ -628,8 +626,8 @@ def _build_execution_steps(self, dag, inputs, outputs): break else: if need not in outputs: - log.debug(" adding delete instruction for %s", need) - steps.append(DeleteInstruction(need)) + log.debug(" adding evict-instruction for %s", need) + steps.append(_EvictInstruction(need)) else: raise AssertionError("Unrecognized network graph node %r" % node) @@ -727,9 +725,9 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) if outputs: # Filter outputs to just return what's requested. - # Otherwise, eturn the whole solution as output, + # Otherwise, return the whole solution as output, # including input and intermediate data nodes. - # TODO: assert no other outputs exists due to DelInstructs. + # TODO: assert no other outputs exists due to evict-instructions. solution = dict(i for i in solution.items() if i[0] in outputs) return solution @@ -743,4 +741,3 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) err_aid.setdefault("solution", locs.get("solution")) setattr(ex, "graphkit_aid", err_aid) raise - diff --git a/graphkit/plot.py b/graphkit/plot.py index 1b5daa0e..126e3e93 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -76,7 +76,7 @@ def plot(self, filename=None, show=False, **kws): square intermediate data, neither given nor asked. red frame - delete-instruction, to free up memory. + evict-instruction, to free up memory. blue frame pinned-instruction, not to overwrite intermediate inputs. filled @@ -189,7 +189,7 @@ def build_pydot( import pydot from .base import NetworkOperation, Operation from .modifiers import optional - from .network import DeleteInstruction, PinInstruction + from .network import _EvictInstruction, _PinInstruction _monkey_patch_for_jupyter(pydot) @@ -231,8 +231,8 @@ def get_node_name(a): # FrameColor change by step type if steps and nx_node in steps: choice = _merge_conditions( - _is_class_value_in_list(steps, DeleteInstruction, nx_node), - _is_class_value_in_list(steps, PinInstruction, nx_node), + _is_class_value_in_list(steps, _EvictInstruction, nx_node), + _is_class_value_in_list(steps, _PinInstruction, nx_node), ) # 0 is singled out because `nx_node` exists in `steps`. color = "NOPE #990000 blue purple".split()[choice] diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 2184ee9e..b9972ad0 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -12,7 +12,7 @@ import graphkit.modifiers as modifiers import graphkit.network as network from graphkit import Operation, compose, operation -from graphkit.network import DeleteInstruction +from graphkit.network import _EvictInstruction def scream(*args, **kwargs): @@ -654,30 +654,30 @@ def test_optional_per_function_with_same_output(): assert pipeline(named_inputs, ["a+-b"]) == {"a+-b": -9} -def test_deleted_optional(): - # Test that DeleteInstructions included for optionals do not raise +def test_evicted_optional(): + # Test that _EvictInstructions included for optionals do not raise # exceptions when the corresponding input is not prodided. # Function to add two values plus an optional third value. def addplusplus(a, b, c=0): return a + b + c - # Here, a DeleteInstruction will be inserted for the optional need 'c'. + # Here, a _EvictInstruction will be inserted for the optional need 'c'. sum_op1 = operation( name="sum_op1", needs=["a", "b", modifiers.optional("c")], provides="sum1" )(addplusplus) sum_op2 = operation(name="sum_op2", needs=["sum1", "sum1"], provides="sum2")(add) net = compose(name="test_net")(sum_op1, sum_op2) - # DeleteInstructions are used only when a subset of outputs are requested. + # _EvictInstructions are used only when a subset of outputs are requested. results = net({"a": 4, "b": 3}, outputs=["sum2"]) assert "sum2" in results -def test_deleteinstructs_vary_with_inputs(): - # Check #21: DeleteInstructions positions vary when inputs change. - def count_deletions(steps): - return sum(isinstance(n, DeleteInstruction) for n in steps) +def test_evict_instructions_vary_with_inputs(): + # Check #21: _EvictInstructions positions vary when inputs change. + def count_evictions(steps): + return sum(isinstance(n, _EvictInstruction) for n in steps) pipeline = compose(name="pipeline")( operation(name="a free without b", needs=["a"], provides=["aa"])(identity), @@ -709,21 +709,21 @@ def count_deletions(steps): assert res == filtdict(exp, "asked") # ok steps22 = pipeline.compile(inp, ["asked"]).steps - # When no outs, no del-instructs. + # When no outs, no evict-instructions. assert steps11 != steps12 - assert count_deletions(steps11) == 0 + assert count_evictions(steps11) == 0 assert steps21 != steps22 - assert count_deletions(steps21) == 0 + assert count_evictions(steps21) == 0 # Check steps vary with inputs # # FAILs in v1.2.4 + #18, PASS in #26 assert steps11 != steps21 - # Check deletes vary with inputs + # Check evicts vary with inputs # # FAILs in v1.2.4 + #18, PASS in #26 - assert count_deletions(steps12) != count_deletions(steps22) + assert count_evictions(steps12) != count_evictions(steps22) @pytest.mark.slow From b1157b3d79f87af90f1e2e6335959957313a9a03 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 12 Oct 2019 12:07:19 +0300 Subject: [PATCH 144/167] FIX(plot): unquoted `graph` labelled graphs crash pydot/pydot#111 --- graphkit/plot.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/graphkit/plot.py b/graphkit/plot.py index 126e3e93..2528507f 100644 --- a/graphkit/plot.py +++ b/graphkit/plot.py @@ -216,12 +216,15 @@ def append_any_clusters(dot): for cluster in new_clusters.values(): dot.add_subgraph(cluster) + def quote_dot_kws(word): + return "'%s'" % word if word in pydot.dot_keywords else word + def get_node_name(a): if isinstance(a, Operation): - return a.name - return a + a = a.name + return quote_dot_kws(a) - dot = pydot.Dot(graph_type="digraph", label=title, fontname="italic") + dot = pydot.Dot(graph_type="digraph", label=quote_dot_kws(title), fontname="italic") # draw nodes for nx_node in graph.nodes: @@ -250,7 +253,7 @@ def get_node_name(a): kw["style"] = "filled" kw["fillcolor"] = fill_color # kw["tooltip"] = str(solution.get(nx_node)) # not working :-() - node = pydot.Node(name=nx_node, shape=shape, **kw) + node = pydot.Node(name=quote_dot_kws(nx_node), shape=shape, **kw) else: # Operation kw = {"fontname": "italic"} @@ -260,7 +263,7 @@ def get_node_name(a): if executed and nx_node in executed: kw["style"] = "filled" kw["fillcolor"] = fill_color - node = pydot.Node(name=nx_node.name, shape=shape, **kw) + node = pydot.Node(name=quote_dot_kws(nx_node.name), shape=shape, **kw) _apply_user_props(node, node_props, key=node.get_name()) From 92608eb7969eb5ddb29f492b03b948bbfebd77f4 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 12 Oct 2019 12:32:44 +0300 Subject: [PATCH 145/167] fix(doc): 2nd full-plot in README not matching sample code --- README.md | 2 +- docs/source/images/executed_3ops.svg | 146 +++++++++++++-------------- 2 files changed, 73 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index eeaa14c1..5a64d292 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ using these methods: ```python graphop.plot(show=True) # open a matplotlib window -graphop.plot("graphop.svg") # other supported formats: png, jpg, pdf, ... +graphop.plot("graphop.svg") # other supported formats: png, jpg, pdf, ... graphop.plot() # without arguments return a pydot.DOT object graphop.plot(solution=out) # annotate graph with solution values ``` diff --git a/docs/source/images/executed_3ops.svg b/docs/source/images/executed_3ops.svg index d8ffa4e3..6d630d58 100644 --- a/docs/source/images/executed_3ops.svg +++ b/docs/source/images/executed_3ops.svg @@ -4,142 +4,140 @@ - - + + G - -graphop - + +graphop + +cluster_after prunning + +after prunning + + +abspow1 + +abspow1 + + + +abs_a_minus_ab_cubed + +abs_a_minus_ab_cubed + + + +abspow1->abs_a_minus_ab_cubed + + + + + a - -a + +a - + mul1 - -mul1 + +mul1 a->mul1 - - + + - + ab - -ab + +ab a->ab - - -4 + + +4 - + sub1 - -sub1 + +sub1 a->sub1 - - + + - + b - -b + +b mul1->b - - -1 + + +1 mul1->ab - - + + b->mul1 - - + + b->sub1 - - -2 + + +2 ab->sub1 - - - - - -abspow1 - -abspow1 - - - -ab->abspow1 - - -5 + + sub1->a - - -3 + + +3 - + a_minus_ab - -a_minus_ab + +a_minus_ab sub1->a_minus_ab - - + + a_minus_ab->abspow1 - - - - - -abs_a_minus_ab_cubed - -abs_a_minus_ab_cubed - - - -abspow1->abs_a_minus_ab_cubed - - + + From 996eed1e38f3779a2cfb668269eb04b994148b9b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 12 Oct 2019 12:35:34 +0300 Subject: [PATCH 146/167] fix(doc.TC): +2 index.rst doctests were just comments --- docs/source/index.rst | 48 ++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index c7fa98a0..ca25f433 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,34 +51,30 @@ OR with dependencies for plotting support (and you need to install `Graphviz Here's a Python script with an example GraphKit computation graph that produces multiple outputs (``a * b``, ``a - a * b``, and ``abs(a - a * b) ** 3``):: - from operator import mul, sub - from graphkit import compose, operation + >>> from operator import mul, sub + >>> from graphkit import compose, operation # Computes |a|^p. - def abspow(a, p): - c = abs(a) ** p - return c - - # Compose the mul, sub, and abspow operations into a computation graph. - graphop = compose(name="graphop")( - operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), - operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), - operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) - ) - - # Run the graph-operation and request all of the outputs. - out = graphop({'a': 2, 'b': 5}) - - # Prints "{'a': 2, 'a_minus_ab': -8, 'b': 5, 'ab': 10, 'abs_a_minus_ab_cubed': 512}". - print(out) - - # Run the graph-operation and request a subset of the outputs. - out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) - - # Prints "{'a_minus_ab': -8}". - print(out) - -As you can see, any function can be used as an operation in GraphKit, + >>> def abspow(a, p): + ... c = abs(a) ** p + ... return c + + >>> # Compose the mul, sub, and abspow operations into a computation graph. + >>> graphop = compose(name="graphop")( + ... operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + ... operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), + ... operation(name="abspow1", needs=["a_minus_ab"], provides=["abs_a_minus_ab_cubed"], params={"p": 3})(abspow) + ... ) + + >>> # Run the graph-operation and request all of the outputs. + >>> graphop({'a': 2, 'b': 5}) + {'a': 2, 'b': 5, 'ab': 10, 'a_minus_ab': -8, 'abs_a_minus_ab_cubed': 512} + + >>> # Run the graph-operation and request a subset of the outputs. + >>> graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) + {'a_minus_ab': -8} + +As you can see, any function can be used as an operation in GraphKit, even ones imported from system modules! From 3742a5691abb244426514ce4dac1ebfd9eabc3f8 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sat, 12 Oct 2019 12:44:31 +0300 Subject: [PATCH 147/167] test(net): check Pin & Evict combine appropriately --- test/test_graphkit.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index b9972ad0..aba5dde5 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -321,7 +321,7 @@ def test_pruning_not_overrides_given_intermediate(): def test_pruning_multiouts_not_override_intermediates1(): # Test #25: v.1.2.4 overwrites intermediate data when a previous operation # must run for its other outputs (outputs asked or not) - pipeline = compose(name="pipeline")( + pipeline = compose(name="graph")( operation(name="must run", needs=["a"], provides=["overriden", "calced"])( lambda x: (x, 2 * x) ), @@ -340,6 +340,12 @@ def test_pruning_multiouts_not_override_intermediates1(): # - on #18(unsatisfied) + #23(ordered-sets) with empty result. # FIXED on #26 assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") + # Plan must contain "overriden" step twice, for pin & evict. + # Plot it to see, or check https://github.com/huyng/graphkit/pull/1#discussion_r334226396. + datasteps = [s for s in pipeline.net.last_plan.steps if s == "overriden"] + assert len(datasteps) == 2 + assert isinstance(datasteps[0], network._PinInstruction) + assert isinstance(datasteps[1], network._EvictInstruction) ## Test OVERWITES # From 499426d7a49e0665640ad8a96a16cade6f522cc8 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 04:46:13 +0300 Subject: [PATCH 148/167] fix(site): travs pass now with new site check --- docs/source/composition.rst | 8 +- docs/source/images/bigger_example_graph.svg | 156 ++++++++++++++++++++ 2 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 docs/source/images/bigger_example_graph.svg diff --git a/docs/source/composition.rst b/docs/source/composition.rst index c770f9d3..32a2584d 100644 --- a/docs/source/composition.rst +++ b/docs/source/composition.rst @@ -86,7 +86,9 @@ This can be useful if you have a graph-operation that accepts alternative forms Adding on to an existing computation graph ------------------------------------------ -Sometimes you will have an existing computation graph to which you want to add operations. This is simple, since ``compose`` can compose whole graphs along with individual ``operation`` instances. For example, if we have ``graph`` as above, we can add another operation to it to create a new graph:: +Sometimes you will have an existing computation graph to which you want to add operations. +This is simple, since ``compose`` can compose whole graphs along with individual ``operation`` instances. +For example, if we have ``graph`` as above, we can add another operation to it to create a new graph:: >>> # Add another subtraction operation to the graph. >>> bigger_graph = compose(name="bigger_graph")( @@ -99,7 +101,9 @@ Sometimes you will have an existing computation graph to which you want to add o >>> sol {'a_minus_ab_minus_c': -13} -This yields a graph which looks like this (see :ref:`plotting`): +This yields a graph which looks like this (see :ref:`plotting`):: + + >>> bigger_graph.plot('bigger_example_graph.svg', solution=sol) # doctest: +SKIP .. image:: images/bigger_example_graph.svg diff --git a/docs/source/images/bigger_example_graph.svg b/docs/source/images/bigger_example_graph.svg new file mode 100644 index 00000000..462c3cdb --- /dev/null +++ b/docs/source/images/bigger_example_graph.svg @@ -0,0 +1,156 @@ + + + + + + +G + +bigger_graph + +cluster_after prunning + +after prunning + + + +ab + +ab + + + +abs_a_minus_ab_cubed + +abs_a_minus_ab_cubed + + + +a + +a + + + +graphop + +graphop + + + +a->graphop + + + + + +b + +b + + + +a->b + + +2 + + + +graphop->ab + + + + + +graphop->abs_a_minus_ab_cubed + + + + + +graphop->a + + +1 + + + +a_minus_ab + +a_minus_ab + + + +graphop->a_minus_ab + + + + + +b->graphop + + + + + +sub2 + +sub2 + + + +b->sub2 + + +3 + + + +a_minus_ab->sub2 + + + + + +c + +c + + + +a_minus_ab->c + + +5 + + + +sub2->a_minus_ab + + +4 + + + +a_minus_ab_minus_c + +a_minus_ab_minus_c + + + +sub2->a_minus_ab_minus_c + + + + + +c->sub2 + + + + + From f4abf154146310101ba1b5bf0d1f9aa4be938b15 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 03:17:30 +0300 Subject: [PATCH 149/167] ENH(JETSAM): generify EX-ANNOTATION mechaism ... + FEAT(base): `jetsam()` machinery to colect from locals(). + enh(net): jestam in compute(). + enh: shorten annotation-names. + doc(net): explain why eviction cannot clan all solution (drop old todo). + refact(func): homogenize `resultS` in _compute() with other ops. + DROP(DOC): cloned debugging sections were not removed from composition.rst. + doc(debug): minot enhancements. --- docs/source/composition.rst | 68 ----------------- docs/source/plotting.rst | 28 ++++--- graphkit/base.py | 144 ++++++++++++++++++++++++++++++------ graphkit/functional.py | 99 ++++++++++++------------- graphkit/network.py | 73 ++++++++---------- test/test_base.py | 118 +++++++++++++++++++++++++++++ 6 files changed, 337 insertions(+), 193 deletions(-) create mode 100644 test/test_base.py diff --git a/docs/source/composition.rst b/docs/source/composition.rst index 32a2584d..73239487 100644 --- a/docs/source/composition.rst +++ b/docs/source/composition.rst @@ -140,71 +140,3 @@ As always, we can run computations with this graph by simply calling it:: >>> merged_graph({'a': 2, 'b': 5, 'c': 5}, outputs=["cab"]) {'cab': 50} - - - -Errors & debugging ------------------- - -If an operation fails, the original exception gets annotated -with the folllowing properties, as a debug aid: - ->>> from pprint import pprint - ->>> def scream(*args): -... raise ValueError("Wrong!") - ->>> try: -... compose("errgraph")( -... operation(name="screamer", needs=['a'], provides=["foo"])(scream) -... )({'a': None}) -... except ValueError as ex: -... pprint(ex.graphkit_aid) -{'network': - ... - 'operation': FunctionalOperation(name='screamer', needs=['a'], provides=['foo']), - 'operation_args': {'args': [None], 'kwargs': {}}, - 'operation_fnouts': None, - 'operation_outs': None, - 'operation_results': None, - 'plan': ExecutionPlan(inputs=('a',), outputs=(), steps: - +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])), - 'solution': {'a': None}} - - -The following annotated attributes might have values on an exception ``ex``: - -``ex.network`` - the innermost network owning the failed operation/function - -``ex.plan`` - the innermost plan that executing when a operation crashed - -``ex.operation`` - the innermost operation that failed - -``ex.operation_args`` - either a 2-tuple ``(args, kwargs)`` or just the ``args`` fed to the operation - -``ex.operation_fnouts`` - the names of the outputs the function was expected to return - -``ex.operation_outs`` - the names eventually the graph needed from the operation - (a subset of the above) - -``ex.operation_results`` - the values dict, if any; it maybe a *zip* of the provides - with the actual returned values of the function, ot the raw results. - -.. note:: - The :ref:`plotting` capabilities, along with the above annotation of exceptions - with the internal state of plan/operation often renders a debugger session - unnecessary. But since the state of the annotated values might be incomple, - you may not always avoid one. - - -Execution internals -------------------- -.. automodule:: graphkit.network - :noindex: diff --git a/docs/source/plotting.rst b/docs/source/plotting.rst index 77530bc9..a5981c6e 100644 --- a/docs/source/plotting.rst +++ b/docs/source/plotting.rst @@ -22,7 +22,7 @@ solution of the last computation, calling methods with arguments like this:: .. figure:: images/GraphkitLegend.svg :alt: Graphkit Legend :width: 100% - + The legend for all graphkit diagrams, generated by :func:`graphkit.plot.legend()`. The same ``plot()`` methods are defined on a :class:`NetworkOperation`, @@ -60,7 +60,10 @@ the last run to inspect it. If you want the bare-bone diagram, simply reset it: Errors & debugging ------------------ -If an operation fails, the original exception gets annotated +Graphs may become arbitrary deep. Launching a debugger-session to inspect +deeply nested stacks is notoriously hard + +As a workaround, when some operation fails, the original exception gets annotated with the folllowing properties, as a debug aid: >>> from graphkit import compose, operation @@ -74,20 +77,27 @@ with the folllowing properties, as a debug aid: ... operation(name="screamer", needs=['a'], provides=["foo"])(scream) ... )({'a': None}) ... except ValueError as ex: -... pprint(ex.graphkit_aid) -{'network': +... pprint(ex.graphkit_jetsam) +{'args': {'args': [None], 'kwargs': {}}, + 'fnouts': ['foo'], + 'network': ... 'operation': FunctionalOperation(name='screamer', needs=['a'], provides=['foo']), - 'operation_args': {'args': [None], 'kwargs': {}}, - 'operation_fnouts': None, - 'operation_outs': None, - 'operation_results': None, + 'outs': None, 'plan': ExecutionPlan(inputs=('a',), outputs=(), steps: +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])), + 'results': None, 'solution': {'a': None}} -The following annotated attributes might have values on an exception ``ex``: +In interactive *REPL* console you may use this to get the last raised exception:: + + import sys + + sys.last_value.graphkit_jetsam + + +The following annotated attributes *might* have meaningfull value on an exception: ``network`` the innermost network owning the failed operation/function diff --git a/graphkit/base.py b/graphkit/base.py index b50a64e6..14b48497 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -1,5 +1,9 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +import contextlib +import logging +from collections import namedtuple + try: from collections import abc except ImportError: @@ -8,6 +12,101 @@ from . import plot +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def jetsam(locs, annotation="graphkit_jetsam", **keys_to_salvage): + """ + Debug-aid to annotate exceptions with salvaged values from wrapped functions. + + :param locs: + ``locals()`` from the context-manager's block containing vars + to be salvaged in case of exception + + ATTENTION: wrapped function must finally call ``locals()``, because + *locals* dictionary only reflects local-var changes after call. + :param keys_to_salvage: + a mapping of destination-annotation-keys --> source-locals-keys; + if a `source` is callable, the value to salvage is retrieved + by calling ``value(locs)``. + + :raise: + any exception raised by the wrapped function, annotated with values + assigned as atrributes on this context-manager + + - Any attrributes attached on this manager are attached as a new dict on + the raised exception as new ``graphkit_jetsam`` attrribute with a dict as value. + - If the exception is already annotated, any new items are inserted, + but existing ones are preserved. + + **Example:** + + Call it with managed-block's ``locals()`` and tell which of them to salvage + in case of errors:: + + + with jetsam(locals(), a="salvaged_a", foo="missing"): + try: + a = 1 + raise Exception() + finally: + locals() # to update locals-dict handed to jetsam(). + + And then from a REPL:: + + import sys + sys.last_value.graphkit_jetsam + {'salvaged_a': 1, "undefined": None} + + + ** Reason:** + + Graphs may become arbitrary deep. Debugging such graphs is notoriously hard. + + The purpose is not to require a debugger-session to inspect the root-causes + (without precluding one). + + Naively salvaging values with a simple try/except block around each function, + blocks the debugger from landing on the real cause of the error - it would + land on that block; and that could be many nested levels above it. + """ + ## Fail EARLY before yielding on bad use. + # + assert isinstance(locs, dict), ("Bad `locs` given to jetsam`, not a dict:", locs) + assert keys_to_salvage, "No `keys_to_salvage` given to jetsam`!" + assert all(isinstance(v, str) or callable(v) for v in keys_to_salvage.values()), ( + "Bad `keys_to_salvage` given to jetsam`:", + keys_to_salvage, + ) + + try: + yield jetsam + except Exception as ex_to_annotate: + try: + annotations = getattr(ex_to_annotate, annotation, None) + if not isinstance(annotations, dict): + annotations = {} + setattr(ex_to_annotate, annotation, annotations) + + ## Salvage any asked + for dst_key, src in keys_to_salvage.items(): + try: + salvaged_value = src(locs) if callable(src) else locs.get(src) + annotations.setdefault(dst_key, salvaged_value) + except Exception as ex: + log.warning( + "Supressed error while salvaging jetsam item (%r, %r): %r" + % (dst_key, src, ex) + ) + except Exception as ex: + log.warning( + "Supressed error while annotating exception: %r", ex, exc_info=1 + ) + + raise # re-raise without ex-arg, not to insert my frame + + class Data(object): """ This wraps any data that is consumed or produced @@ -99,29 +198,28 @@ def compute(self, inputs): raise NotImplementedError("Define callable of %r!" % self) def _compute(self, named_inputs, outputs=None): - try: - args = [named_inputs[d] for d in self.needs] - results = self.compute(args) - - results = zip(self.provides, results) - - if outputs: - outs = set(outputs) - results = filter(lambda x: x[0] in outs, results) - - return dict(results) - except Exception as ex: - ## Annotate exception with debugging aid on errors. - # - locs = locals() - err_aid = getattr(ex, "graphkit_aid", {}) - err_aid.setdefault("operation", self) - err_aid.setdefault("operation_args", locs.get("args")) - err_aid.setdefault("operation_fnouts", locs.get("outputs")) - err_aid.setdefault("operation_outs", locs.get("outputs")) - err_aid.setdefault("operation_results", locs.get("results")) - setattr(ex, "graphkit_aid", err_aid) - raise + with jetsam( + locals(), + operation="self", + outs="outputs", + fnouts="provides", + args="args", + results="results", + ): + try: + provides = self.provides + args = [named_inputs[d] for d in self.needs] + results = self.compute(args) + + results = zip(provides, results) + + if outputs: + outs = set(outputs) + results = filter(lambda x: x[0] in outs, results) + + return dict(results) + finally: + locals() # to update locals-dict handed to jetsam() def _after_init(self): """ diff --git a/graphkit/functional.py b/graphkit/functional.py index b5c22f02..989274ee 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -3,7 +3,7 @@ import networkx as nx from boltons.setutils import IndexedSet as iset -from .base import NetworkOperation, Operation +from .base import jetsam, NetworkOperation, Operation from .modifiers import optional, sideffect from .network import Network @@ -14,57 +14,52 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - try: - args = [ - named_inputs[n] - for n in self.needs - if not isinstance(n, optional) and not isinstance(n, sideffect) - ] - - # Find any optional inputs in named_inputs. Get only the ones that - # are present there, no extra `None`s. - optionals = { - n: named_inputs[n] - for n in self.needs - if isinstance(n, optional) and n in named_inputs - } - - # Combine params and optionals into one big glob of keyword arguments. - kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} - - # Don't expect sideffect outputs. - provides = [n for n in self.provides if not isinstance(n, sideffect)] - - result = self.fn(*args, **kwargs) - - if not provides: - # All outputs were sideffects. - return {} - - if len(provides) == 1: - result = [result] - - result = zip(provides, result) - if outputs: - outputs = set(n for n in outputs if not isinstance(n, sideffect)) - result = filter(lambda x: x[0] in outputs, result) - - return dict(result) - except Exception as ex: - ## Annotate exception with debugging aid on errors. - # - locs = locals() - err_aid = getattr(ex, "graphkit_aid", {}) - err_aid.setdefault("operation", self) - err_aid.setdefault( - "operation_args", - {"args": locs.get("args"), "kwargs": locs.get("kwargs")}, - ) - err_aid.setdefault("operation_fnouts", locs.get("outputs")) - err_aid.setdefault("operation_outs", locs.get("outputs")) - err_aid.setdefault("operation_results", locs.get("results")) - setattr(ex, "graphkit_aid", err_aid) - raise + with jetsam( + locals(), + operation="self", + outs="outputs", + fnouts="provides", + args=lambda locs: {"args": locs.get("args"), "kwargs": locs.get("kwargs")}, + results="results", + ): + try: + args = [ + named_inputs[n] + for n in self.needs + if not isinstance(n, optional) and not isinstance(n, sideffect) + ] + + # Find any optional inputs in named_inputs. Get only the ones that + # are present there, no extra `None`s. + optionals = { + n: named_inputs[n] + for n in self.needs + if isinstance(n, optional) and n in named_inputs + } + + # Combine params and optionals into one big glob of keyword arguments. + kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} + + # Don't expect sideffect outputs. + provides = [n for n in self.provides if not isinstance(n, sideffect)] + + results = self.fn(*args, **kwargs) + + if not provides: + # All outputs were sideffects. + return {} + + if len(provides) == 1: + results = [results] + + results = zip(provides, results) + if outputs: + outputs = set(n for n in outputs if not isinstance(n, sideffect)) + results = filter(lambda x: x[0] in outputs, results) + + return dict(results) + finally: + locals() # to update locals-dict handed to jetsam() def __call__(self, *args, **kwargs): return self.fn(*args, **kwargs) diff --git a/graphkit/network.py b/graphkit/network.py index 797e497d..a94a682b 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -77,7 +77,7 @@ from networkx import DiGraph from . import plot -from .base import Operation +from .base import jetsam, Operation from .modifiers import optional, sideffect log = logging.getLogger(__name__) @@ -257,15 +257,13 @@ def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): solution[value_name] = inputs[value_name] def _call_operation(self, op, solution): - try: - return op._compute(solution) - except Exception as ex: - ## Annotate exception with debugging aid on errors. - # - err_aid = getattr(ex, "graphkit_aid", {}) - err_aid.setdefault("plan", self) - setattr(ex, "graphkit_aid", err_aid) - raise + # Although `plan` have added to jetsam in `compute()``, + # add it again, in case compile()/execute is called separately. + with jetsam(locals(), plan="self"): + try: + return op._compute(solution) + finally: + locals() # to update locals-dict handed to jetsam() def _execute_thread_pool_barrier_method( self, inputs, solution, overwrites, thread_pool_size=10 @@ -710,34 +708,27 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) :returns: a dictionary of output data objects, keyed by name. """ - try: - assert ( - isinstance(outputs, (list, tuple)) or outputs is None - ), "The outputs argument must be a list" - - # Build the execution plan. - self.last_plan = plan = self.compile(named_inputs.keys(), outputs) - - # start with fresh data solution. - solution = dict(named_inputs) - - plan.execute(solution, overwrites_collector, method) - - if outputs: - # Filter outputs to just return what's requested. - # Otherwise, return the whole solution as output, - # including input and intermediate data nodes. - # TODO: assert no other outputs exists due to evict-instructions. - solution = dict(i for i in solution.items() if i[0] in outputs) - - return solution - except Exception as ex: - ## Annotate exception with debugging aid on errorrs. - # - locs = locals() - err_aid = getattr(ex, "graphkit_aid", {}) - err_aid.setdefault("network", locs.get("self")) - err_aid.setdefault("plan", locs.get("plan")) - err_aid.setdefault("solution", locs.get("solution")) - setattr(ex, "graphkit_aid", err_aid) - raise + with jetsam(locals(), network="self", plan="plan", solution="solution"): + try: + assert ( + isinstance(outputs, (list, tuple)) or outputs is None + ), "The outputs argument must be a list" + + # Build the execution plan. + self.last_plan = plan = self.compile(named_inputs.keys(), outputs) + + # start with fresh data solution. + solution = dict(named_inputs) + + plan.execute(solution, overwrites_collector, method) + + if outputs: + # Filter outputs to just return what's requested. + # Otherwise, return the whole solution as output, + # including input and intermediate data nodes. + # Still needed with eviction to clean isolated given inputs. + solution = dict(i for i in solution.items() if i[0] in outputs) + + return solution + finally: + locals() # to update locals-dict handed to jetsam() diff --git a/test/test_base.py b/test/test_base.py new file mode 100644 index 00000000..ccde450e --- /dev/null +++ b/test/test_base.py @@ -0,0 +1,118 @@ +# Copyright 2016, Yahoo Inc. +# Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +import logging + +import pytest +import itertools as itt + +from graphkit import base + + +def test_jetsam_without_failure(caplog): + caplog.set_level(logging.INFO) + with pytest.raises(AssertionError, match="No `keys_to_salvage`"): + with base.jetsam({}): + pytest.xfail("Jetsam did not detect bad inputs!") + + assert "No-op jetsam! Call" not in caplog.text + assert "Supressed error" not in caplog.text + + +@pytest.mark.parametrize("locs", [None, (), [], [0], "bad"]) +def test_jetsam_bad_locals(locs, caplog): + caplog.set_level(logging.INFO) + with pytest.raises(AssertionError, match="Bad `locs`") as excinfo: + with base.jetsam(locs, a="a"): + raise Exception() + + assert not hasattr(excinfo.value, "graphkit_jetsam") + assert "Supressed error while annotating exception" not in caplog.text + + +@pytest.mark.parametrize("keys", [{"k": None}, {"k": ()}, {"k": []}, {"k": [0]}]) +def test_jetsam_bad_keys(keys, caplog): + caplog.set_level(logging.INFO) + with pytest.raises(AssertionError, match="Bad `keys_to_salvage`") as excinfo: + with base.jetsam({}, **keys): + raise Exception("ABC") + + assert not hasattr(excinfo.value, "graphkit_jetsam") + assert "Supressed error while annotating exception" not in caplog.text + + +@pytest.mark.parametrize("locs", [None, (), [], [0], "bad"]) +def test_jetsam_bad_locals_given(locs, caplog): + caplog.set_level(logging.INFO) + with pytest.raises(AssertionError, match="`locs` given to jetsam") as excinfo: + with base.jetsam(locs, a="a"): + raise Exception("ABC") + + assert not hasattr(excinfo.value, "graphkit_jetsam") + assert "Supressed error while annotating exception" not in caplog.text + + +@pytest.mark.parametrize("annotation", [None, (), [], [0], "bad"]) +def test_jetsam_bad_existing_annotation(annotation, caplog): + caplog.set_level(logging.INFO) + with pytest.raises(Exception, match="ABC") as excinfo: + with base.jetsam({}, a="a"): + ex = Exception("ABC") + ex.graphkit_jetsam = annotation + raise ex + + assert excinfo.value.graphkit_jetsam == {"a": None} + assert "Supressed error while annotating exception" not in caplog.text + + +def test_jetsam_dummy_locals(caplog): + with pytest.raises(Exception, match="ABC") as excinfo: + with base.jetsam({"a": 1}, a="a", bad="bad"): + + raise Exception("ABC") + + assert isinstance(excinfo.value.graphkit_jetsam, dict) + assert excinfo.value.graphkit_jetsam == {"a": 1, "bad": None} + assert "Supressed error" not in caplog.text + + +def _jetsamed_fn(): + b = 1 + with base.jetsam(locals(), a="a", b="b"): + try: + a = 1 + b = 2 + raise Exception("ABC", a, b) + finally: + locals() + + +def test_jetsam_locals_simple(caplog): + with pytest.raises(Exception, match="ABC") as excinfo: + _jetsamed_fn() + assert excinfo.value.graphkit_jetsam == {"a": 1, "b": 2} + assert "Supressed error" not in caplog.text + + +def test_jetsam_nested(): + def inner(): + with base.jetsam(locals(), fn="fn"): + try: + + fn = "inner" + raise Exception("ABC") + finally: + locals() + + def outer(): + with base.jetsam(locals(), fn="fn"): + try: + + fn = "outer" + inner() + finally: + locals() + + with pytest.raises(Exception, match="ABC") as excinfo: + outer() + + assert excinfo.value.graphkit_jetsam == {"fn": "inner"} From d109c9e263d090043043c25829fad79c1d616303 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 03:31:34 +0300 Subject: [PATCH 150/167] FEAT(TCs): check SITE and fail on errors --- .travis.yml | 2 +- test/test_doc.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index eccdd493..f5176cc6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,7 @@ script: if [[ "$TRAVIS_PYTHON_VERSION" = '3.7' ]]; then pytest --cov=graphkit -m 'slow or not slow' else - pytest --cov=graphkit test/ + pytest --cov=graphkit test/ fi deploy: diff --git a/test/test_doc.py b/test/test_doc.py index f54cf76f..0669d350 100644 --- a/test/test_doc.py +++ b/test/test_doc.py @@ -5,10 +5,12 @@ import sys +proj_path = osp.join(osp.dirname(__file__), "..") + + def test_README_as_PyPi_landing_page(monkeypatch): from docutils import core as dcore - proj_path = osp.join(osp.dirname(__file__), "..") long_desc = subprocess.check_output( "python setup.py --long-description".split(), cwd=proj_path ) @@ -22,3 +24,9 @@ def test_README_as_PyPi_landing_page(monkeypatch): "halt_level": 2 # 2=WARN, 1=INFO }, ) + + +# @pytest.mark.slow +def test_site(): + # Fail on warnings, but don't rebuild all files (no `-a`), + subprocess.check_call("python setup.py build_sphinx -W".split(), cwd=proj_path) From d80b57e9fcdf5414d9bc30708c1199700b10f323 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 04:45:34 +0300 Subject: [PATCH 151/167] FEAT(TCs): check SITE and fail on errors, but... FAILs due to missing image in composition.rst. --- .travis.yml | 4 +++- test/test_doc.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index f5176cc6..3ea468ab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,9 @@ script: if [[ "$TRAVIS_PYTHON_VERSION" = '3.7' ]]; then pytest --cov=graphkit -m 'slow or not slow' else - pytest --cov=graphkit test/ + # Undo configs in setup.cfg + echo -e '[pytest]\nmarkers: slow' > pytest.ini + pytest fi deploy: diff --git a/test/test_doc.py b/test/test_doc.py index 0669d350..eb0b9fb1 100644 --- a/test/test_doc.py +++ b/test/test_doc.py @@ -26,7 +26,6 @@ def test_README_as_PyPi_landing_page(monkeypatch): ) -# @pytest.mark.slow def test_site(): # Fail on warnings, but don't rebuild all files (no `-a`), - subprocess.check_call("python setup.py build_sphinx -W".split(), cwd=proj_path) + subprocess.check_output("python setup.py build_sphinx -W".split(), cwd=proj_path) From 5f0200c22619e5d14b7efdae033ea4d7fa19348a Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 12:54:41 +0300 Subject: [PATCH 152/167] enh(travis): coverage also from older pythons --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3ea468ab..b8a60fb8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,7 +31,7 @@ script: else # Undo configs in setup.cfg echo -e '[pytest]\nmarkers: slow' > pytest.ini - pytest + pytest --cov=graphkit fi deploy: From 91a11cfd49d87b996b4ed4dd197f772915649803 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 15:19:34 +0300 Subject: [PATCH 153/167] refact(TC): import directly modifiers (shorter code) --- test/test_graphkit.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index aba5dde5..60a8f129 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -9,9 +9,8 @@ import pytest -import graphkit.modifiers as modifiers import graphkit.network as network -from graphkit import Operation, compose, operation +from graphkit import Operation, compose, operation, optional, sideffect from graphkit.network import _EvictInstruction @@ -532,9 +531,9 @@ def test_optional(): def addplusplus(a, b, c=0): return a + b + c - sum_op = operation( - name="sum_op1", needs=["a", "b", modifiers.optional("c")], provides="sum" - )(addplusplus) + sum_op = operation(name="sum_op1", needs=["a", "b", optional("c")], provides="sum")( + addplusplus + ) net = compose(name="test_net")(sum_op) @@ -564,13 +563,13 @@ def increment(box): graph = compose("mygraph")( operation( name="extend", - needs=["box", modifiers.sideffect("a")], - provides=[modifiers.sideffect("b")], + needs=["box", sideffect("a")], + provides=[sideffect("b")], )(extend), operation( name="increment", - needs=["box", modifiers.sideffect("b")], - provides=modifiers.sideffect("c"), + needs=["box", sideffect("b")], + provides=sideffect("c"), )(increment), ) @@ -580,13 +579,13 @@ def increment(box): graph = compose("mygraph")( operation( name="increment", - needs=["box", modifiers.sideffect("a")], - provides=modifiers.sideffect("b"), + needs=["box", sideffect("a")], + provides=sideffect("b"), )(increment), operation( name="extend", - needs=["box", modifiers.sideffect("b")], - provides=[modifiers.sideffect("c")], + needs=["box", sideffect("b")], + provides=[sideffect("c")], )(extend), ) @@ -606,7 +605,7 @@ def test_optional_per_function_with_same_output(): add_op = operation(name="add", needs=["a", "b"], provides="a+-b")(add) sub_op_optional = operation( - name="sub_opt", needs=["a", modifiers.optional("b")], provides="a+-b" + name="sub_opt", needs=["a", optional("b")], provides="a+-b" )(lambda a, b=10: a - b) # Normal order @@ -670,7 +669,7 @@ def addplusplus(a, b, c=0): # Here, a _EvictInstruction will be inserted for the optional need 'c'. sum_op1 = operation( - name="sum_op1", needs=["a", "b", modifiers.optional("c")], provides="sum1" + name="sum_op1", needs=["a", "b", optional("c")], provides="sum1" )(addplusplus) sum_op2 = operation(name="sum_op2", needs=["sum1", "sum1"], provides="sum2")(add) net = compose(name="test_net")(sum_op1, sum_op2) @@ -688,11 +687,9 @@ def count_evictions(steps): pipeline = compose(name="pipeline")( operation(name="a free without b", needs=["a"], provides=["aa"])(identity), operation(name="satisfiable", needs=["a", "b"], provides=["ab"])(add), - operation( - name="optional ab", - needs=["aa", modifiers.optional("ab")], - provides=["asked"], - )(lambda a, ab=10: a + ab), + operation(name="optional ab", needs=["aa", optional("ab")], provides=["asked"])( + lambda a, ab=10: a + ab + ), ) inp = {"a": 2, "b": 3} @@ -759,7 +756,7 @@ def fn3(z, k=1): operation(name="b", needs="x", provides="bo")(fn), # this should execute after a and b have finished operation(name="c", needs=["ao", "bo"], provides="co")(fn2), - operation(name="d", needs=["ao", modifiers.optional("k")], provides="do")(fn3), + operation(name="d", needs=["ao", optional("k")], provides="do")(fn3), operation(name="e", needs=["ao", "bo"], provides="eo")(fn2), operation(name="f", needs="eo", provides="fo")(fn), operation(name="g", needs="fo", provides="go")(fn), From e4ef2ee7bb7e327ba3a1f131fe10f5cc4433d1b3 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 15:56:56 +0300 Subject: [PATCH 154/167] ENH(netop): accept SINGLE STR as asked-outs ... ... shortcut instead of demanding the usual singular lists. + change some sample calls in the docs. --- docs/source/composition.rst | 10 +++++++--- graphkit/network.py | 13 ++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/source/composition.rst b/docs/source/composition.rst index 73239487..3a2fc4de 100644 --- a/docs/source/composition.rst +++ b/docs/source/composition.rst @@ -60,14 +60,18 @@ For example, if ``graph`` is as defined above, we can run it like this:: Producing a subset of outputs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -By default, calling a graph-operation on a set of inputs will yield all of that graph's outputs. You can use the ``outputs`` parameter to request only a subset. For example, if ``graphop`` is as above:: +By default, calling a graph-operation on a set of inputs will yield all of that graph's outputs. +You can use the ``outputs`` parameter to request only a subset. +For example, if ``graphop`` is as above:: # Run the graph-operation and request a subset of the outputs. - >>> out = graphop({'a': 2, 'b': 5}, outputs=["a_minus_ab"]) + >>> out = graphop({'a': 2, 'b': 5}, outputs="a_minus_ab") >>> out {'a_minus_ab': -8} -When using ``outputs`` to request only a subset of a graph's outputs, GraphKit executes only the ``operation`` nodes in the graph that are on a path from the inputs to the requested outputs. For example, the ``abspow1`` operation will not be executed here. +When using ``outputs`` to request only a subset of a graph's outputs, GraphKit executes +only the ``operation`` nodes in the graph that are on a path from the inputs to the requested outputs. +For example, the ``abspow1`` operation will not be executed here. Short-circuiting a graph computation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/graphkit/network.py b/graphkit/network.py index a94a682b..6d5009b3 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -692,8 +692,8 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) you want to populate, and the values are the concrete values you want to set for the data node. - :param list output: - once all necessary computations are complete. + :param outputs: + a string or a list of strings with all data asked to compute. If you set this variable to ``None``, all data nodes will be kept and returned at runtime. @@ -710,9 +710,12 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) """ with jetsam(locals(), network="self", plan="plan", solution="solution"): try: - assert ( - isinstance(outputs, (list, tuple)) or outputs is None - ), "The outputs argument must be a list" + if isinstance(outputs, str): + outputs = [outputs] + elif not isinstance(outputs, (list, tuple)) and outputs is not None: + raise ValueError( + "The outputs argument must be a list, was: %s", outputs + ) # Build the execution plan. self.last_plan = plan = self.compile(named_inputs.keys(), outputs) From 56abdee6ddad3245dc32302ade6db815a8fac557 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 16:08:16 +0300 Subject: [PATCH 155/167] enh(jetsham): allow *vars as 1-1 mappings; rename jetsams; ... + ENH(net):add forgotten `outputs` jetsam earlier in compute(). + refact: Had to rename JETSAMs to take adavntage of 1-1 mappngs. + refact: updated jetsam usage to reduce source-lines. + renamed jetsam(arg-names). --- docs/source/plotting.rst | 19 +++++++------- graphkit/base.py | 54 +++++++++++++++++++++++----------------- graphkit/functional.py | 6 ++--- graphkit/network.py | 4 +-- test/test_base.py | 6 ++--- 5 files changed, 49 insertions(+), 40 deletions(-) diff --git a/docs/source/plotting.rst b/docs/source/plotting.rst index a5981c6e..b4dd9b67 100644 --- a/docs/source/plotting.rst +++ b/docs/source/plotting.rst @@ -79,13 +79,13 @@ with the folllowing properties, as a debug aid: ... except ValueError as ex: ... pprint(ex.graphkit_jetsam) {'args': {'args': [None], 'kwargs': {}}, - 'fnouts': ['foo'], 'network': ... 'operation': FunctionalOperation(name='screamer', needs=['a'], provides=['foo']), - 'outs': None, + 'outputs': None, 'plan': ExecutionPlan(inputs=('a',), outputs=(), steps: +--FunctionalOperation(name='screamer', needs=['a'], provides=['foo'])), + 'provides': ['foo'], 'results': None, 'solution': {'a': None}} @@ -108,17 +108,18 @@ The following annotated attributes *might* have meaningfull value on an exceptio ``operation`` the innermost operation that failed -``operation_args`` - either a 2-tuple ``(args, kwargs)`` or just the ``args`` fed to the operation +``args`` + either the input arguments list fed into the function, or a dict with + both ``args`` & ``kwargs`` keys in it. -``operation_fnouts`` +``outputs`` the names of the outputs the function was expected to return -``operation_outs`` - the names eventually the graph needed from the operation - (a subset of the above) +``provides`` + the names eventually the graph needed from the operation; + a subset of the above, and not always what has been declared in the operation. -``operation_results`` +``results`` the values dict, if any; it maybe a *zip* of the provides with the actual returned values of the function, ot the raw results. diff --git a/graphkit/base.py b/graphkit/base.py index 14b48497..5ec79ce1 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -16,28 +16,31 @@ @contextlib.contextmanager -def jetsam(locs, annotation="graphkit_jetsam", **keys_to_salvage): +def jetsam(locs, *salvage_vars, annotation="graphkit_jetsam", **salvage_mappings): """ Debug-aid to annotate exceptions with salvaged values from wrapped functions. - + :param locs: ``locals()`` from the context-manager's block containing vars to be salvaged in case of exception - + ATTENTION: wrapped function must finally call ``locals()``, because *locals* dictionary only reflects local-var changes after call. - :param keys_to_salvage: + :param salvage_vars: + local variable names to save as is in the salvaged annotations dictionary. + :param salvage_mappings: a mapping of destination-annotation-keys --> source-locals-keys; if a `source` is callable, the value to salvage is retrieved by calling ``value(locs)``. - + They take precendance over`salvae_vars`. + :raise: any exception raised by the wrapped function, annotated with values assigned as atrributes on this context-manager - Any attrributes attached on this manager are attached as a new dict on the raised exception as new ``graphkit_jetsam`` attrribute with a dict as value. - - If the exception is already annotated, any new items are inserted, + - If the exception is already annotated, any new items are inserted, but existing ones are preserved. **Example:** @@ -46,9 +49,10 @@ def jetsam(locs, annotation="graphkit_jetsam", **keys_to_salvage): in case of errors:: - with jetsam(locals(), a="salvaged_a", foo="missing"): + with jetsam(locals(), "a", b="salvaged_b", c_var="c"): try: a = 1 + b = 2 raise Exception() finally: locals() # to update locals-dict handed to jetsam(). @@ -57,13 +61,13 @@ def jetsam(locs, annotation="graphkit_jetsam", **keys_to_salvage): import sys sys.last_value.graphkit_jetsam - {'salvaged_a': 1, "undefined": None} + {'a': 1, 'salvaged_b': 2, "c_var": None} ** Reason:** - + Graphs may become arbitrary deep. Debugging such graphs is notoriously hard. - + The purpose is not to require a debugger-session to inspect the root-causes (without precluding one). @@ -73,13 +77,22 @@ def jetsam(locs, annotation="graphkit_jetsam", **keys_to_salvage): """ ## Fail EARLY before yielding on bad use. # - assert isinstance(locs, dict), ("Bad `locs` given to jetsam`, not a dict:", locs) - assert keys_to_salvage, "No `keys_to_salvage` given to jetsam`!" - assert all(isinstance(v, str) or callable(v) for v in keys_to_salvage.values()), ( - "Bad `keys_to_salvage` given to jetsam`:", - keys_to_salvage, + assert isinstance(locs, dict), ("Bad `locs`, not a dict:", locs) + assert all(isinstance(i, str) for i in salvage_vars), ( + "Bad `salvage_vars`!", + salvage_vars, + ) + assert salvage_mappings, "No `salvage_mappings` given!" + assert all(isinstance(v, str) or callable(v) for v in salvage_mappings.values()), ( + "Bad `salvage_mappings`:", + salvage_mappings, ) + ## Merge vars-mapping to save. + for var in salvage_vars: + if var not in salvage_mappings: + salvage_mappings[var] = var + try: yield jetsam except Exception as ex_to_annotate: @@ -89,8 +102,8 @@ def jetsam(locs, annotation="graphkit_jetsam", **keys_to_salvage): annotations = {} setattr(ex_to_annotate, annotation, annotations) - ## Salvage any asked - for dst_key, src in keys_to_salvage.items(): + ## Salvage those asked + for dst_key, src in salvage_mappings.items(): try: salvaged_value = src(locs) if callable(src) else locs.get(src) annotations.setdefault(dst_key, salvaged_value) @@ -199,12 +212,7 @@ def compute(self, inputs): def _compute(self, named_inputs, outputs=None): with jetsam( - locals(), - operation="self", - outs="outputs", - fnouts="provides", - args="args", - results="results", + locals(), "outputs", "provides", "args", "results", operation="self" ): try: provides = self.provides diff --git a/graphkit/functional.py b/graphkit/functional.py index 989274ee..95926670 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -16,11 +16,11 @@ def __init__(self, **kwargs): def _compute(self, named_inputs, outputs=None): with jetsam( locals(), + "outputs", + "provides", + "results", operation="self", - outs="outputs", - fnouts="provides", args=lambda locs: {"args": locs.get("args"), "kwargs": locs.get("kwargs")}, - results="results", ): try: args = [ diff --git a/graphkit/network.py b/graphkit/network.py index 6d5009b3..64e056ad 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -580,7 +580,7 @@ def _build_execution_steps(self, dag, inputs, outputs): outp-names to decide whether to add (and which) evict-instructions Instances of :class:`_EvictInstructions` are inserted in `steps` between - operation nodes to reduce the memory footprint of solutions while + operation nodes to reduce the memory footprint of solutions while the computation is running. An evict-instruction is inserted whenever a *need* is not used by any other *operation* further down the DAG. @@ -708,7 +708,7 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) :returns: a dictionary of output data objects, keyed by name. """ - with jetsam(locals(), network="self", plan="plan", solution="solution"): + with jetsam(locals(), "plan", "solution", "outputs", network="self"): try: if isinstance(outputs, str): outputs = [outputs] diff --git a/test/test_base.py b/test/test_base.py index ccde450e..00df0ebb 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -10,7 +10,7 @@ def test_jetsam_without_failure(caplog): caplog.set_level(logging.INFO) - with pytest.raises(AssertionError, match="No `keys_to_salvage`"): + with pytest.raises(AssertionError, match="No `salvage_mappings`"): with base.jetsam({}): pytest.xfail("Jetsam did not detect bad inputs!") @@ -32,7 +32,7 @@ def test_jetsam_bad_locals(locs, caplog): @pytest.mark.parametrize("keys", [{"k": None}, {"k": ()}, {"k": []}, {"k": [0]}]) def test_jetsam_bad_keys(keys, caplog): caplog.set_level(logging.INFO) - with pytest.raises(AssertionError, match="Bad `keys_to_salvage`") as excinfo: + with pytest.raises(AssertionError, match="Bad `salvage_mappings`") as excinfo: with base.jetsam({}, **keys): raise Exception("ABC") @@ -43,7 +43,7 @@ def test_jetsam_bad_keys(keys, caplog): @pytest.mark.parametrize("locs", [None, (), [], [0], "bad"]) def test_jetsam_bad_locals_given(locs, caplog): caplog.set_level(logging.INFO) - with pytest.raises(AssertionError, match="`locs` given to jetsam") as excinfo: + with pytest.raises(AssertionError, match="Bad `locs`") as excinfo: with base.jetsam(locs, a="a"): raise Exception("ABC") From 9a3e02630fa6ce4aa52f1930694fbb9474fe84b1 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 14 Oct 2019 08:02:17 +0300 Subject: [PATCH 156/167] fix(jetsam): PY2 **kw syntax --- graphkit/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/graphkit/base.py b/graphkit/base.py index 5ec79ce1..af918d15 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -16,7 +16,8 @@ @contextlib.contextmanager -def jetsam(locs, *salvage_vars, annotation="graphkit_jetsam", **salvage_mappings): +## def jetsam(locs, *salvage_vars, annotation="graphkit_jetsam", **salvage_mappings): # bad PY2 syntax +def jetsam(locs, *salvage_vars, **salvage_mappings): """ Debug-aid to annotate exceptions with salvaged values from wrapped functions. @@ -77,6 +78,8 @@ def jetsam(locs, *salvage_vars, annotation="graphkit_jetsam", **salvage_mappings """ ## Fail EARLY before yielding on bad use. # + annotation = salvage_mappings.pop("annotation", "graphkit_jetsam") + assert isinstance(locs, dict), ("Bad `locs`, not a dict:", locs) assert all(isinstance(i, str) for i in salvage_vars), ( "Bad `salvage_vars`!", From 2f651d07df7af68a8e9fc50a2de489041672cbf6 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 16:23:20 +0300 Subject: [PATCH 157/167] test(net): check compute()-msg when bad outs asked --- test/test_graphkit.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 60a8f129..15e47557 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -107,8 +107,10 @@ def pow_op1(a, exponent=2): exp = {"sum_ab_times_b": 2} assert net({"sum_ab": 1, "b": 2}, outputs=["sum_ab_times_b"]) == exp - # visualize network graph - # net.plot(show=True) + with pytest.raises(ValueError, match="Unknown output node"): + net({"sum_ab": 1, "b": 2}, outputs="bad_node") + with pytest.raises(ValueError, match="Unknown output node"): + net({"sum_ab": 1, "b": 2}, outputs=["b", "bad_node"]) def test_network_simple_merge(): From f44b8cd4755ea333c47f245e4e137a22b0347b81 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 16:36:09 +0300 Subject: [PATCH 158/167] FIX(net.sideffects): typo in add_op() were mixing needs/provides ... + refact(net): use same loop-var in add_op(), to avoid copy-paste mistakes, like above. --- graphkit/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 64e056ad..090903be 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -445,11 +445,11 @@ def add_op(self, operation): self.graph.add_edge(_DataNode(n), operation, **kw) # add nodes and edges to graph describing what this layer provides - for p in operation.provides: + for n in operation.provides: kw = {} if isinstance(n, sideffect): kw["sideffect"] = True - self.graph.add_edge(operation, _DataNode(p), **kw) + self.graph.add_edge(operation, _DataNode(n), **kw) def _collect_unsatisfied_operations(self, dag, inputs): """ From f45bc48c7c188b6a19b6859a11cc57b8200b2a0b Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Sun, 13 Oct 2019 16:36:56 +0300 Subject: [PATCH 159/167] enh(net): +repr() listing graph-nodes --- graphkit/network.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/graphkit/network.py b/graphkit/network.py index 090903be..8f15c6eb 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -409,6 +409,10 @@ def __init__(self, **kwargs): #: (not ``compile()``!), for debugging purposes. self.last_plan = None + def __repr__(self): + steps = ["\n +--%s" % s for s in self.graph.nodes] + return "Network(%s)" % "".join(steps) + def _build_pydot(self, **kws): from .plot import build_pydot From 75f3eb2deedad5c802784a0ec5e15a340e2d582f Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 14 Oct 2019 07:35:35 +0300 Subject: [PATCH 160/167] FIX(DAG): broken_dag had PLAIN-STR instead of DataNode... bc subgraph was taken on plain string outputs. + minor upd err-msg terminology. --- graphkit/network.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 8f15c6eb..73b768ec 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -537,7 +537,7 @@ def _prune_graph(self, outputs, inputs): unknown_outputs = iset(outputs) - dag.nodes if unknown_outputs: raise ValueError( - "Unknown output node(s) requested: %s" % ", ".join(unknown_outputs) + "Unknown output node(s) asked: %s" % ", ".join(unknown_outputs) ) broken_dag = dag.copy() # preserve net's graph @@ -560,15 +560,20 @@ def _prune_graph(self, outputs, inputs): # If caller requested specific outputs, we can prune any # unrelated nodes further up the dag. ending_in_outputs = set() - for input_name in outputs: - ending_in_outputs.update(nx.ancestors(dag, input_name)) - broken_dag = broken_dag.subgraph(ending_in_outputs | set(outputs)) + for output_name in outputs: + ending_in_outputs.add(_DataNode(output_name)) + ending_in_outputs.update(nx.ancestors(dag, output_name)) + broken_dag = broken_dag.subgraph(ending_in_outputs) # Prune unsatisfied operations (those with partial inputs or no outputs). unsatisfied = self._collect_unsatisfied_operations(broken_dag, inputs) # Clone it so that it is picklable. pruned_dag = dag.subgraph(broken_dag.nodes - unsatisfied).copy() + assert all( + isinstance(n, (Operation, _DataNode)) for n in pruned_dag + ), pruned_dag + return pruned_dag, broken_edges def _build_execution_steps(self, dag, inputs, outputs): From 804b0c14182eab54aca9b395c5f7025c2b76a358 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 14 Oct 2019 08:22:10 +0300 Subject: [PATCH 161/167] ENH(sideffects): DIFFER from regular DATA... to allow the same name to be used as regular and sideffect data node. + ENH: sideffect-strclass now contains its class-name. + DOC: a lot, comply with docstrings 1-liners; changes also on optionals. + ENH(TCs): +x2 elaborate TCs, check SEs differ from data. --- graphkit/modifiers.py | 67 ++++++++++++++++++------- test/test_graphkit.py | 113 ++++++++++++++++++++++++++++++------------ 2 files changed, 129 insertions(+), 51 deletions(-) diff --git a/graphkit/modifiers.py b/graphkit/modifiers.py index e38c2615..6ef6f50a 100644 --- a/graphkit/modifiers.py +++ b/graphkit/modifiers.py @@ -11,12 +11,14 @@ class optional(str): """ - Input values in ``needs`` may be designated as optional using this modifier. - If this modifier is applied to an input value, that value will be input to - the ``operation`` if it is available. The function underlying the - ``operation`` should have a parameter with the same name as the input value - in ``needs``, and the input value will be passed as a keyword argument if - it is available. + An optional need signifies that the function's argument may not receive a value. + + Only input values in ``needs`` may be designated as optional using this modifier. + An ``operation`` will receive a value for an optional need only if if it is available + in the graph at the time of its invocation. + The ``operation``'s function should have a defaulted parameter with the same name + as the opetional, and the input value will be passed as a keyword argument, + if it is available. Here is an example of an operation that uses an optional argument:: @@ -34,7 +36,7 @@ class optional(str): NetworkOperation(name='mygraph', needs=[optional('a'), optional('b'), optional('c')], provides=['sum']) - + >>> # The graph works with and without 'c' provided as input. >>> graph({'a': 5, 'b': 2, 'c': 4})['sum'] 11 @@ -51,22 +53,25 @@ def __repr__(self): class sideffect(str): """ - Inputs & outputs in ``needs`` & ``provides`` may be designated as *sideffects* - using this modifier. *Tokens* work as usual while solving the DAG but - they are never assigned any values to/from the ``operation`` functions. - Specifically: + A sideffect data-dependency participates in the graph but never given/asked in functions. + + Both inputs & outputs in ``needs`` & ``provides`` may be designated as *sideffects* + using this modifier. *Sideffects* work as usual while solving the graph but + they do not interact with the ``operation``'s function; specifically: - input sideffects are NOT fed into the function; - output sideffects are NOT expected from the function. - Their purpose is to describe functions that have modify internal state - their arguments ("side-effects"). - Note that an ``operation`` with just a single *sideffect* output return - no value at all, but it would still be called for its side-effects only. + .. info: + an ``operation`` with just a single *sideffect* output return no value at all, + but it would still be called for its side-effect only. + Their purpose is to describe operations that modify the internal state of + some of their arguments ("side-effects"). A typical use case is to signify columns required to produce new ones in pandas dataframes:: + >>> from graphkit import operation, compose, sideffect >>> # Function appending a new dataframe column from two pre-existing ones. @@ -81,16 +86,42 @@ class sideffect(str): ... provides=[sideffect('sum')])(addcolumns) ... ) >>> graph - NetworkOperation(name='mygraph', needs=[optional('df'), optional('a'), optional('b')], provides=[sideffect('sum')]) + NetworkOperation(name='mygraph', needs=[optional('df'), optional('sideffect(a)'), optional('sideffect(b)')], provides=['sideffect(sum)']) >>> # The graph works with and without 'c' provided as input. >>> df = pd.DataFrame({'a': [5], 'b': [2]}) # doctest: +SKIP >>> graph({'df': df})['sum'] == 11 # doctest: +SKIP True + Note that regular data in *needs* and *provides* do not match same-named *sideffects*. + That is, in the following operation, the ``prices`` input is different from + the ``sideffect(prices)`` output: + + >>> def upd_prices(sales_df, prices): + ... sales_df["Prices"] = prices + + >>> operation(fn=upd_prices, + ... name="upd_prices", + ... needs=["sales_df", "price"], + ... provides=[sideffect("price")]) + operation(name='upd_prices', needs=['sales_df', 'price'], provides=['sideffect(price)'], fn=upd_prices) + + .. note:: + An ``operation`` with *sideffects* outputs only, have functions that return + no value at all (like the one above). Such operation would still be called for + their side-effects. + + .. tip:: + You may associate sideffects with other data to convey their relationships, + simply by including their names in the string - in the end, it's just a string - + but no enforcement will happen from *graphkit*. + + >>> sideffect("price[sales_df]") + 'sideffect(price[sales_df])' + """ __slots__ = () # avoid __dict__ on instances - def __repr__(self): - return "sideffect('%s')" % self + def __new__(cls, name): + return super(sideffect, cls).__new__(cls, "sideffect(%s)" % name) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 15e47557..065930b6 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -552,46 +552,93 @@ def addplusplus(a, b, c=0): assert results["sum"] == sum(named_inputs.values()) -def test_sideffects(): - # Function without return value. - def extend(box): - box.extend([1, 2]) +# Function without return value. +def _box_extend(box, *args): + box.extend([1, 2]) - def increment(box): - for i in range(len(box)): - box[i] += 1 - # Designate `a`, `b` as sideffect inp/out arguments. - graph = compose("mygraph")( - operation( - name="extend", - needs=["box", sideffect("a")], - provides=[sideffect("b")], - )(extend), - operation( - name="increment", - needs=["box", sideffect("b")], - provides=sideffect("c"), - )(increment), - ) +def _box_increment(box): + for i in range(len(box)): + box[i] += 1 + - assert graph({"box": [0], "a": True})["box"] == [1, 2, 3] +@pytest.mark.parametrize("bools", range(4)) +def test_sideffect_no_real_data(bools): + reverse = bools >> 0 & 1 + parallel = bools >> 1 & 1 - # Reverse order of functions. - graph = compose("mygraph")( + ops = [ operation( - name="increment", - needs=["box", sideffect("a")], - provides=sideffect("b"), - )(increment), + name="extend", needs=["box", sideffect("a")], provides=[sideffect("b")] + )(_box_extend), operation( - name="extend", - needs=["box", sideffect("b")], - provides=[sideffect("c")], - )(extend), - ) + name="increment", needs=["box", sideffect("b")], provides=sideffect("c") + )(_box_increment), + ] + if reverse: + ops = reversed(ops) + # Designate `a`, `b` as sideffect inp/out arguments. + graph = compose("mygraph")(*ops) + if parallel: + graph.set_execution_method("parallel") + + # Normal data must not match sideffects + with pytest.raises(ValueError, match="Unknown output node"): + graph({"box": [0], "a": True}, outputs=["a"]) + with pytest.raises(ValueError, match="Unknown output node"): + graph({"box": [0], "a": True}, outputs=["b"]) + + sol = graph({"box": [0], "a": True}) + # Nothing run if no sideffect inputs given. + assert not graph.net.last_plan.executed + assert sol == {"box": [0], "a": True} + + # Nothing run if no sideffect inputs given. + sol = graph({"box": [0], "a": True}, outputs=["box", sideffect("b")]) + assert not graph.net.last_plan.executed + assert sol == {"box": [0]} + + ## OK INPUT SIDEFFECTS + # + # ok, no asked out + sol = graph({"box": [0], sideffect("a"): True}) + assert sol == {"box": [1, 2, 3], sideffect("a"): True} + # + # bad, not asked the out-sideffect + sol = graph({"box": [0], sideffect("a"): True}, "box") + assert sol == {"box": [0]} + # + # ok, asked the 1st out-sideffect + sol = graph({"box": [0], sideffect("a"): True}, ["box", sideffect("b")]) + assert sol == {"box": [0, 1, 2]} + # + # ok, asked the 2nd out-sideffect + sol = graph({"box": [0], sideffect("a"): True}, ["box", sideffect("c")]) + assert sol == {"box": [1, 2, 3]} + + +@pytest.mark.parametrize("bools", range(4)) +def test_sideffect_real_input(bools): + reverse = bools >> 0 & 1 + parallel = bools >> 1 & 1 + + ops = [ + operation(name="extend", needs=["box", "a"], provides=[sideffect("b")])( + _box_extend + ), + operation(name="increment", needs=["box", sideffect("b")], provides="c")( + _box_increment + ), + ] + if reverse: + ops = reversed(ops) + # Designate `a`, `b` as sideffect inp/out arguments. + graph = compose("mygraph")(*ops) + if parallel: + graph.set_execution_method("parallel") - assert graph({"box": [0], "a": None})["box"] == [1, 1, 2] + assert graph({"box": [0], "a": True}) == {"a": True, "box": [1, 2, 3], "c": None} + assert graph({"box": [0], "a": True}, ["box", "c"]) == {"box": [1, 2, 3], "c": None} @pytest.mark.xfail( From 0e2ada182065330564280de49b3809ddd1388bc5 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 14 Oct 2019 17:38:12 +0300 Subject: [PATCH 162/167] test(jetsam): check actual methods using it --- test/test_base.py | 94 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 7 deletions(-) diff --git a/test/test_base.py b/test/test_base.py index 00df0ebb..8b0419fa 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -1,11 +1,12 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. +import functools as fnt +import itertools as itt import logging import pytest -import itertools as itt -from graphkit import base +from graphkit import base, network, operation def test_jetsam_without_failure(caplog): @@ -75,13 +76,17 @@ def test_jetsam_dummy_locals(caplog): assert "Supressed error" not in caplog.text -def _jetsamed_fn(): +def _scream(*args, **kwargs): + raise Exception("ABC") + + +def _jetsamed_fn(*args, **kwargs): b = 1 with base.jetsam(locals(), a="a", b="b"): try: a = 1 b = 2 - raise Exception("ABC", a, b) + _scream() finally: locals() @@ -97,9 +102,9 @@ def test_jetsam_nested(): def inner(): with base.jetsam(locals(), fn="fn"): try: - + a = 0 fn = "inner" - raise Exception("ABC") + _jetsamed_fn() finally: locals() @@ -108,6 +113,7 @@ def outer(): try: fn = "outer" + b = 0 inner() finally: locals() @@ -115,4 +121,78 @@ def outer(): with pytest.raises(Exception, match="ABC") as excinfo: outer() - assert excinfo.value.graphkit_jetsam == {"fn": "inner"} + assert excinfo.value.graphkit_jetsam == {"fn": "inner", "a": 1, "b": 2} + + +def screaming_dumy_op(): + # No jetsam, in particular, to check sites. + class Op: + _compute = _scream + + return Op() + + +@pytest.mark.parametrize( + "acallable, expected_jetsam", + [ + # NO old-stuff Operation(fn=_jetsamed_fn, name="test", needs="['a']", provides=[]), + ( + fnt.partial( + operation(name="test", needs=["a"], provides=["b"])(_scream)._compute, + named_inputs={"a": 1}, + ), + "outputs provides results operation args".split(), + ), + ( + fnt.partial( + network.ExecutionPlan(*([None] * 7))._call_operation, + op=screaming_dumy_op(), + solution={}, + ), + ["plan"], + ), + # Not easy to test Network calling a screaming func (see next TC). + ], +) +def test_jetsam_sites_screaming_func(acallable, expected_jetsam): + # Check jetsams when the underlying function fails. + with pytest.raises(Exception, match="ABC") as excinfo: + acallable() + + ex = excinfo.value + assert set(ex.graphkit_jetsam.keys()) == set(expected_jetsam) + +@pytest.mark.parametrize( + "acallable, expected_jetsam", + [ + # NO old-stuff Operation(fn=_jetsamed_fn, name="test", needs="['a']", provides=[]), + ( + fnt.partial( + operation(name="test", needs=["a"], provides=["b"])(_scream)._compute, + named_inputs=None, + ), + "outputs provides results operation args".split(), + ), + ( + fnt.partial( + network.ExecutionPlan(*([None] * 7))._call_operation, + op=None, + solution={}, + ), + ["plan"], + ), + ( + fnt.partial( + network.Network().compute, named_inputs=None, outputs=None + ), + "network plan solution outputs".split(), + ), + ], +) +def test_jetsam_sites_scream(acallable, expected_jetsam): + # Check jetsams when the site fails. + with pytest.raises(Exception) as excinfo: + acallable() + + ex = excinfo.value + assert set(ex.graphkit_jetsam.keys()) == set(expected_jetsam) From 60759e6d95ae623a85de070edf2e0c9f31b101ef Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Mon, 14 Oct 2019 18:11:10 +0300 Subject: [PATCH 163/167] refact(jetsam): simpler API no contextlib... but cannot etablish call correctnes, relying on recetn TCs. --- graphkit/base.py | 97 ++++++++++++++++++++---------------------- graphkit/functional.py | 95 +++++++++++++++++++++-------------------- graphkit/network.py | 62 +++++++++++++-------------- test/test_base.py | 75 +++++++++++++++----------------- 4 files changed, 161 insertions(+), 168 deletions(-) diff --git a/graphkit/base.py b/graphkit/base.py index af918d15..2e80275a 100644 --- a/graphkit/base.py +++ b/graphkit/base.py @@ -1,6 +1,5 @@ # Copyright 2016, Yahoo Inc. # Licensed under the terms of the Apache License, Version 2.0. See the LICENSE file associated with the project for terms. -import contextlib import logging from collections import namedtuple @@ -15,18 +14,22 @@ log = logging.getLogger(__name__) -@contextlib.contextmanager -## def jetsam(locs, *salvage_vars, annotation="graphkit_jetsam", **salvage_mappings): # bad PY2 syntax -def jetsam(locs, *salvage_vars, **salvage_mappings): +## def jetsam(ex, locs, *salvage_vars, annotation="graphkit_jetsam", **salvage_mappings): # bad PY2 syntax +def jetsam(ex, locs, *salvage_vars, **salvage_mappings): """ - Debug-aid to annotate exceptions with salvaged values from wrapped functions. + Annotate exception with salvaged values from locals(). + :param ex: + the exception to annotate :param locs: ``locals()`` from the context-manager's block containing vars to be salvaged in case of exception ATTENTION: wrapped function must finally call ``locals()``, because *locals* dictionary only reflects local-var changes after call. + :param str annotation: + (a kwarg not seen in the signature due to PY2 compatibility) + the name of the attribute to attach on the exception :param salvage_vars: local variable names to save as is in the salvaged annotations dictionary. :param salvage_mappings: @@ -50,13 +53,12 @@ def jetsam(locs, *salvage_vars, **salvage_mappings): in case of errors:: - with jetsam(locals(), "a", b="salvaged_b", c_var="c"): - try: - a = 1 - b = 2 - raise Exception() - finally: - locals() # to update locals-dict handed to jetsam(). + try: + a = 1 + b = 2 + raise Exception() + exception Exception as ex: + jetsam(ex, locals(), "a", b="salvaged_b", c_var="c") And then from a REPL:: @@ -64,7 +66,6 @@ def jetsam(locs, *salvage_vars, **salvage_mappings): sys.last_value.graphkit_jetsam {'a': 1, 'salvaged_b': 2, "c_var": None} - ** Reason:** Graphs may become arbitrary deep. Debugging such graphs is notoriously hard. @@ -80,6 +81,7 @@ def jetsam(locs, *salvage_vars, **salvage_mappings): # annotation = salvage_mappings.pop("annotation", "graphkit_jetsam") + assert isinstance(ex, Exception), ("Bad `ex`, not an exception dict:", ex) assert isinstance(locs, dict), ("Bad `locs`, not a dict:", locs) assert all(isinstance(i, str) for i in salvage_vars), ( "Bad `salvage_vars`!", @@ -97,30 +99,26 @@ def jetsam(locs, *salvage_vars, **salvage_mappings): salvage_mappings[var] = var try: - yield jetsam - except Exception as ex_to_annotate: - try: - annotations = getattr(ex_to_annotate, annotation, None) - if not isinstance(annotations, dict): - annotations = {} - setattr(ex_to_annotate, annotation, annotations) - - ## Salvage those asked - for dst_key, src in salvage_mappings.items(): - try: - salvaged_value = src(locs) if callable(src) else locs.get(src) - annotations.setdefault(dst_key, salvaged_value) - except Exception as ex: - log.warning( - "Supressed error while salvaging jetsam item (%r, %r): %r" - % (dst_key, src, ex) - ) - except Exception as ex: - log.warning( - "Supressed error while annotating exception: %r", ex, exc_info=1 - ) + annotations = getattr(ex, annotation, None) + if not isinstance(annotations, dict): + annotations = {} + setattr(ex, annotation, annotations) - raise # re-raise without ex-arg, not to insert my frame + ## Salvage those asked + for dst_key, src in salvage_mappings.items(): + try: + salvaged_value = src(locs) if callable(src) else locs.get(src) + annotations.setdefault(dst_key, salvaged_value) + except Exception as ex: + log.warning( + "Supressed error while salvaging jetsam item (%r, %r): %r" + % (dst_key, src, ex) + ) + except Exception as ex2: + log.warning("Supressed error while annotating exception: %r", ex2, exc_info=1) + raise ex2 + + raise # noqa #re-raise without ex-arg, not to insert my frame class Data(object): @@ -214,23 +212,22 @@ def compute(self, inputs): raise NotImplementedError("Define callable of %r!" % self) def _compute(self, named_inputs, outputs=None): - with jetsam( - locals(), "outputs", "provides", "args", "results", operation="self" - ): - try: - provides = self.provides - args = [named_inputs[d] for d in self.needs] - results = self.compute(args) + try: + provides = self.provides + args = [named_inputs[d] for d in self.needs] + results = self.compute(args) - results = zip(provides, results) + results = zip(provides, results) - if outputs: - outs = set(outputs) - results = filter(lambda x: x[0] in outs, results) + if outputs: + outs = set(outputs) + results = filter(lambda x: x[0] in outs, results) - return dict(results) - finally: - locals() # to update locals-dict handed to jetsam() + return dict(results) + except Exception as ex: + jetsam( + ex, locals(), "outputs", "provides", "args", "results", operation="self" + ) def _after_init(self): """ diff --git a/graphkit/functional.py b/graphkit/functional.py index 95926670..4013fddd 100644 --- a/graphkit/functional.py +++ b/graphkit/functional.py @@ -14,52 +14,55 @@ def __init__(self, **kwargs): Operation.__init__(self, **kwargs) def _compute(self, named_inputs, outputs=None): - with jetsam( - locals(), - "outputs", - "provides", - "results", - operation="self", - args=lambda locs: {"args": locs.get("args"), "kwargs": locs.get("kwargs")}, - ): - try: - args = [ - named_inputs[n] - for n in self.needs - if not isinstance(n, optional) and not isinstance(n, sideffect) - ] - - # Find any optional inputs in named_inputs. Get only the ones that - # are present there, no extra `None`s. - optionals = { - n: named_inputs[n] - for n in self.needs - if isinstance(n, optional) and n in named_inputs - } - - # Combine params and optionals into one big glob of keyword arguments. - kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} - - # Don't expect sideffect outputs. - provides = [n for n in self.provides if not isinstance(n, sideffect)] - - results = self.fn(*args, **kwargs) - - if not provides: - # All outputs were sideffects. - return {} - - if len(provides) == 1: - results = [results] - - results = zip(provides, results) - if outputs: - outputs = set(n for n in outputs if not isinstance(n, sideffect)) - results = filter(lambda x: x[0] in outputs, results) - - return dict(results) - finally: - locals() # to update locals-dict handed to jetsam() + try: + args = [ + named_inputs[n] + for n in self.needs + if not isinstance(n, optional) and not isinstance(n, sideffect) + ] + + # Find any optional inputs in named_inputs. Get only the ones that + # are present there, no extra `None`s. + optionals = { + n: named_inputs[n] + for n in self.needs + if isinstance(n, optional) and n in named_inputs + } + + # Combine params and optionals into one big glob of keyword arguments. + kwargs = {k: v for d in (self.params, optionals) for k, v in d.items()} + + # Don't expect sideffect outputs. + provides = [n for n in self.provides if not isinstance(n, sideffect)] + + results = self.fn(*args, **kwargs) + + if not provides: + # All outputs were sideffects. + return {} + + if len(provides) == 1: + results = [results] + + results = zip(provides, results) + if outputs: + outputs = set(n for n in outputs if not isinstance(n, sideffect)) + results = filter(lambda x: x[0] in outputs, results) + + return dict(results) + except Exception as ex: + jetsam( + ex, + locals(), + "outputs", + "provides", + "results", + operation="self", + args=lambda locs: { + "args": locs.get("args"), + "kwargs": locs.get("kwargs"), + }, + ) def __call__(self, *args, **kwargs): return self.fn(*args, **kwargs) diff --git a/graphkit/network.py b/graphkit/network.py index 73b768ec..eed0eaa4 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -259,11 +259,10 @@ def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): def _call_operation(self, op, solution): # Although `plan` have added to jetsam in `compute()``, # add it again, in case compile()/execute is called separately. - with jetsam(locals(), plan="self"): - try: - return op._compute(solution) - finally: - locals() # to update locals-dict handed to jetsam() + try: + return op._compute(solution) + except Exception as ex: + jetsam(ex, locals(), plan="self") def _execute_thread_pool_barrier_method( self, inputs, solution, overwrites, thread_pool_size=10 @@ -717,30 +716,29 @@ def compute(self, named_inputs, outputs, method=None, overwrites_collector=None) :returns: a dictionary of output data objects, keyed by name. """ - with jetsam(locals(), "plan", "solution", "outputs", network="self"): - try: - if isinstance(outputs, str): - outputs = [outputs] - elif not isinstance(outputs, (list, tuple)) and outputs is not None: - raise ValueError( - "The outputs argument must be a list, was: %s", outputs - ) - - # Build the execution plan. - self.last_plan = plan = self.compile(named_inputs.keys(), outputs) - - # start with fresh data solution. - solution = dict(named_inputs) - - plan.execute(solution, overwrites_collector, method) - - if outputs: - # Filter outputs to just return what's requested. - # Otherwise, return the whole solution as output, - # including input and intermediate data nodes. - # Still needed with eviction to clean isolated given inputs. - solution = dict(i for i in solution.items() if i[0] in outputs) - - return solution - finally: - locals() # to update locals-dict handed to jetsam() + try: + if isinstance(outputs, str): + outputs = [outputs] + elif not isinstance(outputs, (list, tuple)) and outputs is not None: + raise ValueError( + "The outputs argument must be a list, was: %s", outputs + ) + + # Build the execution plan. + self.last_plan = plan = self.compile(named_inputs.keys(), outputs) + + # start with fresh data solution. + solution = dict(named_inputs) + + plan.execute(solution, overwrites_collector, method) + + if outputs: + # Filter outputs to just return what's requested. + # Otherwise, return the whole solution as output, + # including input and intermediate data nodes. + # Still needed with eviction to clean isolated given inputs. + solution = dict(i for i in solution.items() if i[0] in outputs) + + return solution + except Exception as ex: + jetsam(ex, locals(), "plan", "solution", "outputs", network="self") diff --git a/test/test_base.py b/test/test_base.py index 8b0419fa..031532ea 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -9,22 +9,14 @@ from graphkit import base, network, operation -def test_jetsam_without_failure(caplog): - caplog.set_level(logging.INFO) - with pytest.raises(AssertionError, match="No `salvage_mappings`"): - with base.jetsam({}): - pytest.xfail("Jetsam did not detect bad inputs!") - - assert "No-op jetsam! Call" not in caplog.text - assert "Supressed error" not in caplog.text - - @pytest.mark.parametrize("locs", [None, (), [], [0], "bad"]) def test_jetsam_bad_locals(locs, caplog): caplog.set_level(logging.INFO) with pytest.raises(AssertionError, match="Bad `locs`") as excinfo: - with base.jetsam(locs, a="a"): + try: raise Exception() + except Exception as ex: + base.jetsam(ex, locs, a="a") assert not hasattr(excinfo.value, "graphkit_jetsam") assert "Supressed error while annotating exception" not in caplog.text @@ -34,8 +26,10 @@ def test_jetsam_bad_locals(locs, caplog): def test_jetsam_bad_keys(keys, caplog): caplog.set_level(logging.INFO) with pytest.raises(AssertionError, match="Bad `salvage_mappings`") as excinfo: - with base.jetsam({}, **keys): + try: raise Exception("ABC") + except Exception as ex: + base.jetsam(ex, {}, **keys) assert not hasattr(excinfo.value, "graphkit_jetsam") assert "Supressed error while annotating exception" not in caplog.text @@ -45,8 +39,10 @@ def test_jetsam_bad_keys(keys, caplog): def test_jetsam_bad_locals_given(locs, caplog): caplog.set_level(logging.INFO) with pytest.raises(AssertionError, match="Bad `locs`") as excinfo: - with base.jetsam(locs, a="a"): + try: raise Exception("ABC") + except Exception as ex: + base.jetsam(ex, locs, a="a") assert not hasattr(excinfo.value, "graphkit_jetsam") assert "Supressed error while annotating exception" not in caplog.text @@ -56,10 +52,12 @@ def test_jetsam_bad_locals_given(locs, caplog): def test_jetsam_bad_existing_annotation(annotation, caplog): caplog.set_level(logging.INFO) with pytest.raises(Exception, match="ABC") as excinfo: - with base.jetsam({}, a="a"): + try: ex = Exception("ABC") ex.graphkit_jetsam = annotation raise ex + except Exception as ex: + base.jetsam(ex, {}, a="a") assert excinfo.value.graphkit_jetsam == {"a": None} assert "Supressed error while annotating exception" not in caplog.text @@ -67,9 +65,10 @@ def test_jetsam_bad_existing_annotation(annotation, caplog): def test_jetsam_dummy_locals(caplog): with pytest.raises(Exception, match="ABC") as excinfo: - with base.jetsam({"a": 1}, a="a", bad="bad"): - + try: raise Exception("ABC") + except Exception as ex: + base.jetsam(ex, {"a": 1}, a="a", bad="bad") assert isinstance(excinfo.value.graphkit_jetsam, dict) assert excinfo.value.graphkit_jetsam == {"a": 1, "bad": None} @@ -82,13 +81,12 @@ def _scream(*args, **kwargs): def _jetsamed_fn(*args, **kwargs): b = 1 - with base.jetsam(locals(), a="a", b="b"): - try: - a = 1 - b = 2 - _scream() - finally: - locals() + try: + a = 1 + b = 2 + _scream() + except Exception as ex: + base.jetsam(ex, locals(), a="a", b="b") def test_jetsam_locals_simple(caplog): @@ -100,23 +98,21 @@ def test_jetsam_locals_simple(caplog): def test_jetsam_nested(): def inner(): - with base.jetsam(locals(), fn="fn"): - try: - a = 0 - fn = "inner" - _jetsamed_fn() - finally: - locals() + try: + a = 0 + fn = "inner" + _jetsamed_fn() + except Exception as ex: + base.jetsam(ex, locals(), fn="fn") def outer(): - with base.jetsam(locals(), fn="fn"): - try: + try: - fn = "outer" - b = 0 - inner() - finally: - locals() + fn = "outer" + b = 0 + inner() + except Exception as ex: + base.jetsam(ex, locals(), fn="fn") with pytest.raises(Exception, match="ABC") as excinfo: outer() @@ -162,6 +158,7 @@ def test_jetsam_sites_screaming_func(acallable, expected_jetsam): ex = excinfo.value assert set(ex.graphkit_jetsam.keys()) == set(expected_jetsam) + @pytest.mark.parametrize( "acallable, expected_jetsam", [ @@ -182,9 +179,7 @@ def test_jetsam_sites_screaming_func(acallable, expected_jetsam): ["plan"], ), ( - fnt.partial( - network.Network().compute, named_inputs=None, outputs=None - ), + fnt.partial(network.Network().compute, named_inputs=None, outputs=None), "network plan solution outputs".split(), ), ], From 362dacdc55eff6da5f3983f9bd23188ee54b02ee Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 15 Oct 2019 14:20:16 +0300 Subject: [PATCH 164/167] FIX(net): iterate_my_self err when droping isolates --- graphkit/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index eed0eaa4..63dc0c12 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -77,7 +77,7 @@ from networkx import DiGraph from . import plot -from .base import jetsam, Operation +from .base import Operation, jetsam from .modifiers import optional, sideffect log = logging.getLogger(__name__) @@ -553,7 +553,7 @@ def _prune_graph(self, outputs, inputs): broken_dag.remove_edges_from(broken_edges) # Drop stray input values and operations (if any). - broken_dag.remove_nodes_from(nx.isolates(broken_dag)) + broken_dag.remove_nodes_from(list(nx.isolates(broken_dag))) if outputs: # If caller requested specific outputs, we can prune any From 74376438a298179f929f86f8360d6e4a46217dc3 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 15 Oct 2019 14:26:41 +0300 Subject: [PATCH 165/167] TEST(plan): new check-multithreading-TC FAILS(!) ... due to shared `Plan.executed`. + fix: enable forgotten check in overrides-TC. --- test/test_graphkit.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 065930b6..56fdfe12 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -34,6 +34,11 @@ def filtdict(d, *keys): return type(d)(i for i in d.items() if i[0] in keys) +def abspow(a, p): + c = abs(a) ** p + return c + + def test_network_smoke(): # Sum operation, late-bind compute function @@ -170,10 +175,6 @@ def test_network_deep_merge(): def test_network_merge_in_doctests(): - def abspow(a, p): - c = abs(a) ** p - return c - graphop = compose(name="graphop")( operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), @@ -310,7 +311,7 @@ def test_pruning_not_overrides_given_intermediate(): pipeline.set_execution_method("parallel") overwrites = {} pipeline.set_overwrites_collector(overwrites) - # assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") + assert pipeline(inputs, ["asked"]) == filtdict(exp, "asked") assert overwrites == {} # unjust must have been pruned overwrites = {} @@ -778,6 +779,31 @@ def count_evictions(steps): assert count_evictions(steps12) != count_evictions(steps22) +def test_multithreading_plan_execution(): + # From Huygn's test-code given in yahoo/graphkit#31 + from multiprocessing.dummy import Pool + from graphkit import compose, operation + + # Compose the mul, sub, and abspow operations into a computation graph. + graph = compose(name="graph")( + operation(name="mul1", needs=["a", "b"], provides=["ab"])(mul), + operation(name="sub1", needs=["a", "ab"], provides=["a_minus_ab"])(sub), + operation( + name="abspow1", + needs=["a_minus_ab"], + provides=["abs_a_minus_ab_cubed"], + params={"p": 3}, + )(abspow), + ) + + pool = Pool(10) + graph.set_execution_method("parallel") + pool.map( + lambda i: graph({"a": 2, "b": 5}, ["a_minus_ab", "abs_a_minus_ab_cubed"]), + range(100), + ) + + @pytest.mark.slow def test_parallel_execution(): import time From 313b630b32c423627129745383ee43c946cd8acc Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 15 Oct 2019 14:45:13 +0300 Subject: [PATCH 166/167] FIX(NET,PLAN): MULTITHREAD works with executed LOCAL-VAR, ... Note that plotting `executed` has not been undone, it is not receiving ever any set now. --- graphkit/network.py | 38 +++++++++++++++++--------------------- test/test_base.py | 4 ++-- test/test_graphkit.py | 2 -- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/graphkit/network.py b/graphkit/network.py index 63dc0c12..fed109d8 100644 --- a/graphkit/network.py +++ b/graphkit/network.py @@ -142,8 +142,7 @@ def __repr__(self): class ExecutionPlan( - namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps executed"), - plot.Plotter, + namedtuple("_ExePlan", "net inputs outputs dag broken_edges steps"), plot.Plotter ): """ The result of the network's compilation phase. @@ -170,8 +169,6 @@ class ExecutionPlan( The tuple of operation-nodes & *instructions* needed to evaluate the given inputs & asked outputs, free memory and avoid overwritting any given intermediate inputs. - :ivar executed: - An empty set to collect all operations that have been executed so far. """ @property @@ -189,7 +186,6 @@ def _build_pydot(self, **kws): "steps": self.steps, "inputs": self.inputs, "outputs": self.outputs, - "executed": self.executed, "edge_props": { e: {"color": "wheat", "penwidth": 2} for e in self.broken_edges }, @@ -215,7 +211,7 @@ def get_data_node(self, name): if isinstance(node, _DataNode): return node - def _can_schedule_operation(self, op): + def _can_schedule_operation(self, op, executed): """ Determines if a Operation is ready to be scheduled for execution @@ -223,6 +219,8 @@ def _can_schedule_operation(self, op): :param op: The Operation object to check + :param set executed: + An empty set to collect all operations that have been executed so far. :return: A boolean indicating whether the operation may be scheduled for execution based on what has already been executed. @@ -232,9 +230,9 @@ def _can_schedule_operation(self, op): dependencies = set( n for n in nx.ancestors(self.broken_dag, op) if isinstance(n, Operation) ) - return dependencies.issubset(self.executed) + return dependencies.issubset(executed) - def _can_evict_value(self, name): + def _can_evict_value(self, name, executed): """ Determines if a _DataNode is ready to be evicted from solution. @@ -247,7 +245,7 @@ def _can_evict_value(self, name): # Use `broken_dag` not to block a successor waiting for this data, # since in any case will use a given input, not some pipe of this data. return data_node and set(self.broken_dag.successors(data_node)).issubset( - self.executed + executed ) def _pin_data_in_solution(self, value_name, solution, inputs, overwrites): @@ -265,7 +263,7 @@ def _call_operation(self, op, solution): jetsam(ex, locals(), plan="self") def _execute_thread_pool_barrier_method( - self, inputs, solution, overwrites, thread_pool_size=10 + self, inputs, solution, overwrites, executed, thread_pool_size=10 ): """ This method runs the graph using a parallel pool of thread executors. @@ -290,15 +288,15 @@ def _execute_thread_pool_barrier_method( for node in self.steps: if ( isinstance(node, Operation) - and self._can_schedule_operation(node) - and node not in self.executed + and node not in executed + and self._can_schedule_operation(node, executed) ): upnext.append(node) elif isinstance(node, _EvictInstruction): # Only evict if all successors for the data node # have been executed. # An optional need may not have a value in the solution. - if node in solution and self._can_evict_value(node): + if node in solution and self._can_evict_value(node, executed): log.debug("removing data '%s' from solution.", node) del solution[node] elif isinstance(node, _PinInstruction): @@ -319,9 +317,9 @@ def _execute_thread_pool_barrier_method( for op, result in done_iterator: solution.update(result) - self.executed.add(op) + executed.add(op) - def _execute_sequential_method(self, inputs, solution, overwrites): + def _execute_sequential_method(self, inputs, solution, overwrites, executed): """ This method runs the graph one operation at a time in a single thread """ @@ -340,7 +338,7 @@ def _execute_sequential_method(self, inputs, solution, overwrites): # add outputs to solution solution.update(layer_outputs) - self.executed.add(step) + executed.add(step) # record execution time t_complete = round(time.time() - t0, 5) @@ -370,9 +368,6 @@ def execute(self, solution, overwrites=None, method=None): because they were "pinned" by input vaules. If missing, the overwrites values are simply discarded. """ - # Clean executed operation from any previous execution. - self.executed.clear() - # choose a method of execution executor = ( self._execute_thread_pool_barrier_method @@ -380,8 +375,10 @@ def execute(self, solution, overwrites=None, method=None): else self._execute_sequential_method ) + executed = set() + # clone and keep orignal inputs in solution intact - executor(dict(solution), solution, overwrites) + executor(dict(solution), solution, overwrites, executed) # return it, but caller can also see the results in `solution` dict. return solution @@ -682,7 +679,6 @@ def compile(self, inputs=(), outputs=()): pruned_dag, tuple(broken_edges), tuple(steps), - executed=iset(), ) # Cache compilation results to speed up future runs diff --git a/test/test_base.py b/test/test_base.py index 031532ea..9f457a5a 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -141,7 +141,7 @@ class Op: ), ( fnt.partial( - network.ExecutionPlan(*([None] * 7))._call_operation, + network.ExecutionPlan(*([None] * 6))._call_operation, op=screaming_dumy_op(), solution={}, ), @@ -172,7 +172,7 @@ def test_jetsam_sites_screaming_func(acallable, expected_jetsam): ), ( fnt.partial( - network.ExecutionPlan(*([None] * 7))._call_operation, + network.ExecutionPlan(*([None] * 6))._call_operation, op=None, solution={}, ), diff --git a/test/test_graphkit.py b/test/test_graphkit.py index 56fdfe12..035c567a 100644 --- a/test/test_graphkit.py +++ b/test/test_graphkit.py @@ -591,12 +591,10 @@ def test_sideffect_no_real_data(bools): sol = graph({"box": [0], "a": True}) # Nothing run if no sideffect inputs given. - assert not graph.net.last_plan.executed assert sol == {"box": [0], "a": True} # Nothing run if no sideffect inputs given. sol = graph({"box": [0], "a": True}, outputs=["box", sideffect("b")]) - assert not graph.net.last_plan.executed assert sol == {"box": [0]} ## OK INPUT SIDEFFECTS From 7658e17fa995aba3a7d09282951bd284ec2a3cc0 Mon Sep 17 00:00:00 2001 From: Kostis Anagnostopoulos Date: Tue, 15 Oct 2019 14:51:04 +0300 Subject: [PATCH 167/167] doc: number sections, add titles for modifiers --- docs/source/index.rst | 3 ++- docs/source/operations.rst | 6 ++++++ docs/source/reference.rst | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index ca25f433..a245dd17 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -25,7 +25,8 @@ It might be of use in computer vision, machine learning and other data science d or become the core of a custom ETL pipelne. .. toctree:: - :maxdepth: 2 + :maxdepth: 3 + :numbered: operations composition diff --git a/docs/source/operations.rst b/docs/source/operations.rst index f3152134..1c175131 100644 --- a/docs/source/operations.rst +++ b/docs/source/operations.rst @@ -137,5 +137,11 @@ Modifiers on ``operation`` inputs and outputs Certain modifiers are available to apply to input or output values in ``needs`` and ``provides``, for example to designate an optional input. These modifiers are available in the ``graphkit.modifiers`` module: + +Optionals +^^^^^^^^^ .. autoclass:: graphkit.modifiers.optional + +Sideffects +^^^^^^^^^^ .. autoclass:: graphkit.modifiers.sideffect diff --git a/docs/source/reference.rst b/docs/source/reference.rst index a64b29c7..1eb28d7c 100644 --- a/docs/source/reference.rst +++ b/docs/source/reference.rst @@ -2,6 +2,11 @@ API Reference ============= +Package: `graphkit` +=================== + +.. automodule:: graphkit + Module: `base` ==============