diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..42391e5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - repo: https://gitlab.com/pycqa/flake8 + rev: "" + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/mirrors-isort + rev: v5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..5941ec3 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['../qvc/pricing/pricing/data/assets.py']\n" + ] + } + ], + "source": [ + "import pyan as p\n", + "from glob import glob\n", + "import importlib\n", + "importlib.reload(p)\n", + "\n", + "filenames = glob(f\"../qvc/pricing/pricing/data/assets.py\", recursive=True)\n", + "print(filenames)\n", + "import logging\n", + "logging.basicConfig(level=logging.ERROR)\n", + "visitor = p.analyzer.CallGraphVisitor(filenames, logging.getLogger())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_related_nodes(visitor, node, namespace=\"pricing\", i=10):\n", + " new_nodes = [node]\n", + " if i < 0:\n", + " return new_nodes\n", + "\n", + " for n in visitor.uses_edges.get(node, []):\n", + " if n in visitor.uses_edges and n not in new_nodes and n.namespace.startswith(namespace):\n", + " new_nodes.extend(get_related_nodes(visitor, n, namespace=namespace, i=i - 1))\n", + "\n", + " for n in visitor.defines_edges.get(node, []):\n", + " if n in visitor.defines_edges and n not in new_nodes and n.namespace.startswith(namespace):\n", + " new_nodes.extend(get_related_nodes(visitor, n, namespace=namespace, i=i - 1))\n", + " return new_nodes\n", + "\n", + "node = [\n", + " n\n", + " for n in visitor.uses_edges.keys()\n", + " if repr(n.flavor) == \"function\" and n.namespace.startswith(\"pricing.data.assets\")\n", + " ][1]\n", + "node\n", + "get_related_nodes(visitor, node)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "node" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{: None,\n", + " : None,\n", + " : None,\n", + " : None,\n", + " : None,\n", + " : None}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "{n: n.namespace for n in visitor.uses_edges[node]}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;34m{\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamespace\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mvisitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefines_edges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m: " + ] + } + ], + "source": [ + "{n: n.namespace for n in visitor.defines_edges[node]}" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'func': <_ast.Attribute object at 0x7fc0e18d7048>, 'args': [<_ast.Name object at 0x7fc0e18d70b8>], 'keywords': [], 'lineno': 285, 'col_offset': 8}\n" + ] + } + ], + "source": [ + "def print_func(f):\n", + " if isinstance(f, list):\n", + " for s in f:\n", + " print_func(s)\n", + " else:\n", + " print(f.__dict__)\n", + "print_func(node.ast_node.body[2].value.func.value)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['../qvc/pricing/pricing/data/assets.py']" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "node.ast_node" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvisitor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_node_of_current_namespace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Documents/Github/pyan/pyan/analyzer.py\u001b[0m in \u001b[0;36mget_node_of_current_namespace\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1105\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mno\u001b[0m \u001b[0massociated\u001b[0m \u001b[0mAST\u001b[0m \u001b[0mnode\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1106\u001b[0m \"\"\"\n\u001b[0;32m-> 1107\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_stack\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# name_stack should never be empty (always at least module name)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1108\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1109\u001b[0m \u001b[0mnamespace\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'.'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname_stack\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "visitor.get_node_of_current_namespace()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/modvis.py b/modvis.py index d9300b1..e048b7f 100644 --- a/modvis.py +++ b/modvis.py @@ -3,10 +3,10 @@ """A simple import analyzer. Visualize dependencies between modules.""" import ast -import os -import logging from glob import glob +import logging from optparse import OptionParser # TODO: migrate to argparse +import os import pyan.node import pyan.visgraph diff --git a/pyan/__init__.py b/pyan/__init__.py index faba830..2f76619 100644 --- a/pyan/__init__.py +++ b/pyan/__init__.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from typing import Union, List -import io from glob import glob +import io +from typing import List, Union -from .main import main # noqa: F401, for export only. from .analyzer import CallGraphVisitor -from .writers import SVGWriter, HTMLWriter, DotWriter +from .main import main # noqa: F401, for export only. from .visgraph import VisualGraph +from .writers import DotWriter, HTMLWriter, SVGWriter __version__ = "1.1.2" diff --git a/pyan/analyzer.py b/pyan/analyzer.py index df112bf..fc83874 100644 --- a/pyan/analyzer.py +++ b/pyan/analyzer.py @@ -2,16 +2,23 @@ # -*- coding: utf-8 -*- """The AST visitor.""" -import logging import ast +import logging import symtable from typing import Union -from .node import Node, Flavor -from .anutils import tail, get_module_name, format_alias, \ - get_ast_node_name, sanitize_exprs, \ - resolve_method_resolution_order, \ - Scope, ExecuteInInnerScope, UnresolvedSuperCallError +from .anutils import ( + ExecuteInInnerScope, + Scope, + UnresolvedSuperCallError, + format_alias, + get_ast_node_name, + get_module_name, + resolve_method_resolution_order, + sanitize_exprs, + tail, +) +from .node import Flavor, Node # TODO: add Cython support (strip type annotations in a preprocess step, then treat as Python) # TODO: built-in functions (range(), enumerate(), zip(), iter(), ...): @@ -33,6 +40,8 @@ # https://docs.python.org/2/library/compiler.html#module-compiler.ast # https://docs.python.org/3/library/ast.html#abstract-grammar # + + class CallGraphVisitor(ast.NodeVisitor): """A visitor that can be walked over a Python AST, and will derive information about the objects in the AST and how they use each other. @@ -55,7 +64,7 @@ def __init__(self, filenames, logger=None): # data gathered from analysis self.defines_edges = {} self.uses_edges = {} - self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) + self.nodes = {} # Node name: list of Node objects (in possibly different namespaces) self.scopes = {} # fully qualified name of namespace: Scope object self.class_base_ast_nodes = {} # pass 1: class Node: list of AST nodes @@ -87,7 +96,10 @@ def process(self): def process_one(self, filename): """Analyze the specified Python source file.""" if filename not in self.filenames: - raise ValueError("Filename '%s' has not been preprocessed (was not given to __init__, which got %s)" % (filename, self.filenames)) + raise ValueError( + "Filename '%s' has not been preprocessed (was not given to __init__, which got %s)" + % (filename, self.filenames) + ) with open(filename, "rt", encoding="utf-8") as f: content = f.read() self.filename = filename @@ -166,12 +178,7 @@ def resolve_imports(self): """ # first find all imports and map to themselves. we will then remap those that are currently pointing # to duplicates or into the void - imports_to_resolve = { - n - for items in self.nodes.values() - for n in items - if n.flavor == Flavor.IMPORTEDITEM - } + imports_to_resolve = {n for items in self.nodes.values() for n in items if n.flavor == Flavor.IMPORTEDITEM} # map real definitions import_mapping = {} while len(imports_to_resolve) > 0: @@ -218,21 +225,16 @@ def resolve_imports(self): import_mapping.update(attribute_import_mapping) # remap nodes based on import mapping - self.nodes = { - name: [import_mapping.get(n, n) for n in items] - for name, items in self.nodes.items() - } + self.nodes = {name: [import_mapping.get(n, n) for n in items] for name, items in self.nodes.items()} self.uses_edges = { - import_mapping.get(from_node, from_node): { - import_mapping.get(to_node, to_node) for to_node in to_nodes - } - for from_node, to_nodes in self.uses_edges.items() if len(to_nodes) > 0 + import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes} + for from_node, to_nodes in self.uses_edges.items() + if len(to_nodes) > 0 } self.defines_edges = { - import_mapping.get(from_node, from_node): { - import_mapping.get(to_node, to_node) for to_node in to_nodes - } - for from_node, to_nodes in self.defines_edges.items() if len(to_nodes) > 0 + import_mapping.get(from_node, from_node): {import_mapping.get(to_node, to_node) for to_node in to_nodes} + for from_node, to_nodes in self.defines_edges.items() + if len(to_nodes) > 0 } def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000): @@ -251,9 +253,7 @@ def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = N # filter the nodes to avoid cluttering the callgraph with irrelevant information filtered_nodes = self.get_related_nodes(node, namespace=namespace, max_iter=max_iter) - self.nodes = { - name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items() - } + self.nodes = {name: [node for node in nodes if node in filtered_nodes] for name, nodes in self.nodes.items()} self.uses_edges = { node: {n for n in nodes if n in filtered_nodes} for node, nodes in self.uses_edges.items() @@ -266,7 +266,9 @@ def filter(self, node: Union[None, Node] = None, namespace: Union[str, None] = N } return self - def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000) -> set: + def get_related_nodes( + self, node: Union[None, Node] = None, namespace: Union[str, None] = None, max_iter: int = 1000 + ) -> set: """ get nodes that related to `node` or are in `namespace` @@ -286,7 +288,9 @@ def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str new_nodes = {n for items in self.nodes.values() for n in items} else: new_nodes = { - n for items in self.nodes.values() for n in items + n + for items in self.nodes.values() + for n in items if n.namespace is not None and namespace in n.namespace } @@ -318,9 +322,7 @@ def get_related_nodes(self, node: Union[None, Node] = None, namespace: Union[str [ n for n in self.defines_edges.get(item, []) - if n in self.defines_edges - and n not in new_nodes - and namespace in n.namespace + if n in self.defines_edges and n not in new_nodes and namespace in n.namespace ] ) @@ -330,7 +332,7 @@ def visit_Module(self, node): self.logger.debug("Module %s, %s" % (self.module_name, self.filename)) # Modules live in the top-level namespace, ''. - module_node = self.get_node('', self.module_name, node, flavor=Flavor.MODULE) + module_node = self.get_node("", self.module_name, node, flavor=Flavor.MODULE) self.associate_node(module_node, node, filename=self.filename) ns = self.module_name @@ -487,13 +489,13 @@ def generate_args_nodes(self, ast_args, inner_ns): # is not a valid Python identifier. # # It has no sensible flavor, so we leave its flavor unspecified. - nonsense_node = self.get_node(inner_ns, '^^^argument^^^', None) + nonsense_node = self.get_node(inner_ns, "^^^argument^^^", None) # args, vararg (*args), kwonlyargs, kwarg (**kwargs) for a in ast_args.args: # positional sc.defs[a.arg] = nonsense_node if ast_args.vararg is not None: # *args if present sc.defs[ast_args.vararg] = nonsense_node - for a in ast_args.kwonlyargs: # any after *args or * + for a in ast_args.kwonlyargs: # any after *args or * sc.defs[a.arg] = nonsense_node if ast_args.kwarg is not None: # **kwargs if present sc.defs[ast_args.kwarg] = nonsense_node @@ -529,7 +531,10 @@ def visit_Import(self, node): self.analyze_module_import(import_item, node) def visit_ImportFrom(self, node): - self.logger.debug("ImportFrom: from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom: from %s import %s, %s:%s" + % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) # Pyan needs to know the package structure, and how the program # being analyzed is actually going to be invoked (!), to be able to # resolve relative imports correctly. @@ -537,17 +542,29 @@ def visit_ImportFrom(self, node): # As a solution, we register imports here and later, when all files have been parsed, resolve them. from_node = self.get_node_of_current_namespace() if node.module is None: # resolve relative imports 'None' such as "from . import foo" - self.logger.debug("ImportFrom (original) from %s import %s, %s:%s" % ('.' * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom (original) from %s import %s, %s:%s" + % ("." * node.level, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) tgt_level = node.level - current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] + current_module_namespace = self.module_name.rsplit(".", tgt_level)[0] tgt_name = current_module_namespace - self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom (resolved): from %s import %s, %s:%s" + % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) elif node.level != 0: # resolve from ..module import foo - self.logger.debug("ImportFrom (original): from %s import %s, %s:%s" % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno)) + self.logger.debug( + "ImportFrom (original): from %s import %s, %s:%s" + % (node.module, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) tgt_level = node.level - current_module_namespace = self.module_name.rsplit('.', tgt_level)[0] - tgt_name = current_module_namespace + '.' + node.module - self.logger.debug("ImportFrom (resolved): from %s import %s, %s:%s" % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno)) + current_module_namespace = self.module_name.rsplit(".", tgt_level)[0] + tgt_name = current_module_namespace + "." + node.module + self.logger.debug( + "ImportFrom (resolved): from %s import %s, %s:%s" + % (tgt_name, [format_alias(x) for x in node.names], self.filename, node.lineno) + ) else: tgt_name = node.module # normal from module.submodule import foo @@ -555,11 +572,9 @@ def visit_ImportFrom(self, node): for alias in node.names: # check if import is module if tgt_name + "." + alias.name in self.module_to_filename: - to_node = self.get_node('', tgt_name + "." + alias.name, node, flavor=Flavor.MODULE) + to_node = self.get_node("", tgt_name + "." + alias.name, node, flavor=Flavor.MODULE) else: - to_node = self.get_node( - tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM - ) + to_node = self.get_node(tgt_name, alias.name, node, flavor=Flavor.IMPORTEDITEM) # if there is alias, add extra edge between alias and node if alias.asname is not None: alias_name = alias.asname @@ -587,17 +602,14 @@ def analyze_module_import(self, import_item, ast_node): # where it is being imported to, i.e. the **user** from_node = self.get_node_of_current_namespace() # the thing **being used** (under the asname, if any) - mod_node = self.get_node('', src_name, ast_node, flavor=Flavor.MODULE) + mod_node = self.get_node("", src_name, ast_node, flavor=Flavor.MODULE) # if there is alias, add extra edge between alias and node if import_item.asname is not None: alias_name = import_item.asname else: alias_name = mod_node.name self.add_uses_edge(from_node, mod_node) - self.logger.info( - "New edge added for Use import %s in %s" - % (mod_node, from_node) - ) + self.logger.info("New edge added for Use import %s in %s" % (mod_node, from_node)) self.set_value(alias_name, mod_node) # set node to be discoverable in module self.logger.info("From setting name %s to %s" % (alias_name, mod_node)) @@ -620,7 +632,9 @@ def visit_Constant(self, node): # attribute access (node.ctx determines whether set (ast.Store) or get (ast.Load)) def visit_Attribute(self, node): objname = get_ast_node_name(node.value) - self.logger.debug("Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno)) + self.logger.debug( + "Attribute %s of %s in context %s, %s:%s" % (node.attr, objname, type(node.ctx), self.filename, node.lineno) + ) # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Attribute() only needs to handle @@ -630,7 +644,7 @@ def visit_Attribute(self, node): new_value = self.last_value try: if self.set_attribute(node, new_value): - self.logger.info('setattr %s on %s to %s' % (node.attr, objname, new_value)) + self.logger.info("setattr %s on %s to %s" % (node.attr, objname, new_value)) except UnresolvedSuperCallError: # Trying to set something belonging to an unresolved super() # of something; just ignore this attempt to setattr. @@ -646,7 +660,7 @@ def visit_Attribute(self, node): # Both object and attr known. if isinstance(attr_node, Node): - self.logger.info('getattr %s on %s returns %s' % (node.attr, objname, attr_node)) + self.logger.info("getattr %s on %s returns %s" % (node.attr, objname, attr_node)) # add uses edge from_node = self.get_node_of_current_namespace() @@ -680,9 +694,15 @@ def visit_Attribute(self, node): from_node = self.get_node_of_current_namespace() ns = obj_node.get_name() # fully qualified namespace **of attr** to_node = self.get_node(ns, tgt_name, node, flavor=Flavor.ATTRIBUTE) - self.logger.debug("Use from %s to %s (target obj %s known but target attr %s not resolved; maybe fwd ref or unanalyzed import)" % (from_node, to_node, obj_node, node.attr)) + self.logger.debug( + f"Use from {from_node} to {to_node} (target obj {obj_node} known but target attr " + f"{node.attr} not resolved; maybe fwd ref or unanalyzed import)" + ) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (target obj %s known but target attr %s not resolved; maybe fwd ref or unanalyzed import)" % (from_node, to_node, obj_node, node.attr)) + self.logger.info( + "New edge added for Use from {from_node} to {to_node} (target obj {obj_node} known but " + f"target attr {node.attr} not resolved; maybe fwd ref or unanalyzed import)" + ) # remove resolved wildcard from current site to self.remove_wild(from_node, obj_node, node.attr) @@ -700,7 +720,6 @@ def visit_Name(self, node): # TODO: self.last_value is a hack. Handle names in store context (LHS) # in analyze_binding(), so that visit_Name() only needs to handle # the load context (i.e. detect uses of the name). - # if isinstance(node.ctx, ast.Store): # when we get here, self.last_value has been set by visit_Assign() self.set_value(node.id, self.last_value) @@ -711,8 +730,8 @@ def visit_Name(self, node): to_node = self.get_value(tgt_name) # resolves "self" if needed current_class = self.get_current_class() if current_class is None or to_node is not current_class: # add uses edge only if not pointing to "self" - ###TODO if the name is a local variable (i.e. in the innermost scope), and - ###has no known value, then don't try to create a Node for it. + # TODO if the name is a local variable (i.e. in the innermost scope), and + # has no known value, then don't try to create a Node for it. if not isinstance(to_node, Node): # namespace=None means we don't know the namespace yet to_node = self.get_node(None, tgt_name, node, flavor=Flavor.UNKNOWN) @@ -738,9 +757,15 @@ def visit_Assign(self, node): values = sanitize_exprs(node.value) # values is the same for each set of targets for targets in node.targets: targets = sanitize_exprs(targets) - self.logger.debug("Assign %s %s, %s:%s" % ([get_ast_node_name(x) for x in targets], - [get_ast_node_name(x) for x in values], - self.filename, node.lineno)) + self.logger.debug( + "Assign %s %s, %s:%s" + % ( + [get_ast_node_name(x) for x in targets], + [get_ast_node_name(x) for x in values], + self.filename, + node.lineno, + ) + ) self.analyze_binding(targets, values) def visit_AnnAssign(self, node): # PEP 526, Python 3.6+ @@ -748,13 +773,15 @@ def visit_AnnAssign(self, node): # PEP 526, Python 3.6+ self.last_value = None if node.value is not None: value = sanitize_exprs(node.value) - self.logger.debug("AnnAssign %s %s, %s:%s" % (get_ast_node_name(target[0]), - get_ast_node_name(value[0]), - self.filename, node.lineno)) + self.logger.debug( + "AnnAssign %s %s, %s:%s" + % (get_ast_node_name(target[0]), get_ast_node_name(value[0]), self.filename, node.lineno) + ) self.analyze_binding(target, value) else: # just a type declaration - self.logger.debug("AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), - self.filename, node.lineno)) + self.logger.debug( + "AnnAssign %s , %s:%s" % (get_ast_node_name(target[0]), self.filename, node.lineno) + ) self.last_value = None self.visit(target[0]) # TODO: use the type annotation from node.annotation? @@ -764,10 +791,16 @@ def visit_AugAssign(self, node): targets = sanitize_exprs(node.target) values = sanitize_exprs(node.value) # values is the same for each set of targets - self.logger.debug("AugAssign %s %s %s, %s:%s" % ([get_ast_node_name(x) for x in targets], - type(node.op), - [get_ast_node_name(x) for x in values], - self.filename, node.lineno)) + self.logger.debug( + "AugAssign %s %s %s, %s:%s" + % ( + [get_ast_node_name(x) for x in targets], + type(node.op), + [get_ast_node_name(x) for x in values], + self.filename, + node.lineno, + ) + ) # TODO: maybe no need to handle tuple unpacking in AugAssign? (but simpler to use the same implementation) self.analyze_binding(targets, values) @@ -849,8 +882,7 @@ def analyze_comprehension(self, node, label, field1="elt", field2=None): self.visit(getattr(node, field2)) def visit_Call(self, node): - self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), - self.filename, node.lineno)) + self.logger.debug("Call %s, %s:%s" % (get_ast_node_name(node.func), self.filename, node.lineno)) # visit args to detect uses for arg in node.args: @@ -871,7 +903,9 @@ def visit_Call(self, node): to_node = result_node self.logger.debug("Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node)) + self.logger.info( + "New edge added for Use from %s to %s (via resolved call to built-ins)" % (from_node, to_node) + ) else: # generic function call # Visit the function name part last, so that inside a binding form, @@ -891,10 +925,12 @@ def visit_Call(self, node): if self.last_value in self.class_base_ast_nodes: from_node = self.get_node_of_current_namespace() class_node = self.last_value - to_node = self.get_node(class_node.get_name(), '__init__', None, flavor=Flavor.METHOD) + to_node = self.get_node(class_node.get_name(), "__init__", None, flavor=Flavor.METHOD) self.logger.debug("Use from %s to %s (call creates an instance)" % (from_node, to_node)) if self.add_uses_edge(from_node, to_node): - self.logger.info("New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node)) + self.logger.info( + "New edge added for Use from %s to %s (call creates an instance)" % (from_node, to_node) + ) def visit_With(self, node): self.logger.debug("With (context manager), %s:%s" % (self.filename, node.lineno)) @@ -906,7 +942,7 @@ def add_uses_enter_exit_of(graph_node): withed_obj_node = graph_node self.logger.debug("Use from %s to With %s" % (from_node, withed_obj_node)) - for methodname in ('__enter__', '__exit__'): + for methodname in ("__enter__", "__exit__"): to_node = self.get_node(withed_obj_node.get_name(), methodname, None, flavor=Flavor.METHOD) if self.add_uses_edge(from_node, to_node): self.logger.info("New edge added for Use from %s to %s" % (from_node, to_node)) @@ -1036,13 +1072,13 @@ def analyze_binding(self, targets, values): self.last_value = None for tgt, val in zip(targets, captured_values): self.last_value = val - self.visit(tgt) # LHS, name in a store context + self.visit(tgt) # LHS, name in a store context self.last_value = None else: # FIXME: for now, do the wrong thing in the non-trivial case # old code, no tuple unpacking support for value in values: self.visit(value) # set self.last_value to **something** on the RHS and hope for the best - for tgt in targets: # LHS, name in a store context + for tgt in targets: # LHS, name in a store context self.visit(tgt) self.last_value = None @@ -1107,7 +1143,10 @@ def resolve_builtins(self, ast_node): # build a temporary ast.Attribute AST node so that we can use get_attribute() tmp_astnode = ast.Attribute(value=obj_astnode, attr=attrname, ctx=obj_astnode.ctx) obj_node, attr_node = self.get_attribute(tmp_astnode) - self.logger.debug("Resolve %s() of %s: returning attr node %s" % (funcname, get_ast_node_name(obj_astnode), attr_node)) + self.logger.debug( + "Resolve %s() of %s: returning attr node %s" + % (funcname, get_ast_node_name(obj_astnode), attr_node) + ) return attr_node # add implementations for other built-in funcnames here if needed @@ -1127,8 +1166,9 @@ def resolve_attribute(self, ast_node): if not isinstance(ast_node, ast.Attribute): raise TypeError("Expected ast.Attribute; got %s" % (type(ast_node))) - self.logger.debug("Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), - ast_node.attr, type(ast_node.ctx))) + self.logger.debug( + "Resolve %s.%s in context %s" % (get_ast_node_name(ast_node.value), ast_node.attr, type(ast_node.ctx)) + ) # Resolve nested attributes # @@ -1173,7 +1213,7 @@ def resolve_attribute(self, ast_node): # The CLASS flavor is the best match, as these constants # are object types. # - obj_node = self.get_node('', tn, None, flavor=Flavor.CLASS) + obj_node = self.get_node("", tn, None, flavor=Flavor.CLASS) # attribute of a function call. Detect cases like super().dostuff() elif isinstance(ast_node.value, ast.Call): @@ -1210,12 +1250,14 @@ def analyze_scopes(self, code, filename): # in different scopes, as we should). # scopes = {} + def process(parent_ns, table): sc = Scope(table) ns = "%s.%s" % (parent_ns, sc.name) if len(sc.name) else parent_ns scopes[ns] = sc for t in table.get_children(): process(ns, t) + process(self.module_name, symtable.symtable(code, filename, compile_type="exec")) # add to existing scopes (while not overwriting any existing definitions with None) @@ -1247,7 +1289,7 @@ def get_node_of_current_namespace(self): """ assert len(self.name_stack) # name_stack should never be empty (always at least module name) - namespace = '.'.join(self.name_stack[0:-1]) + namespace = ".".join(self.name_stack[0:-1]) name = self.name_stack[-1] return self.get_node(namespace, name, None, flavor=Flavor.NAMESPACE) @@ -1268,13 +1310,15 @@ def find_scope(name): if sc is not None: value = sc.defs[name] if isinstance(value, Node): - self.logger.info('Get %s in %s, found in %s, value %s' % (name, self.scope_stack[-1], sc, value)) + self.logger.info("Get %s in %s, found in %s, value %s" % (name, self.scope_stack[-1], sc, value)) return value else: # TODO: should always be a Node or None - self.logger.debug('Get %s in %s, found in %s: value %s is not a Node' % (name, self.scope_stack[-1], sc, value)) + self.logger.debug( + "Get %s in %s, found in %s: value %s is not a Node" % (name, self.scope_stack[-1], sc, value) + ) else: - self.logger.debug('Get %s in %s: no Node value (or name not in scope)' % (name, self.scope_stack[-1])) + self.logger.debug("Get %s in %s: no Node value (or name not in scope)" % (name, self.scope_stack[-1])) def set_value(self, name, value): """Set the value of name in the current scope. Value must be a Node.""" @@ -1289,12 +1333,12 @@ def find_scope(name): if sc is not None: if isinstance(value, Node): sc.defs[name] = value - self.logger.info('Set %s in %s to %s' % (name, sc, value)) + self.logger.info("Set %s in %s to %s" % (name, sc, value)) else: # TODO: should always be a Node or None - self.logger.debug('Set %s in %s: value %s is not a Node' % (name, sc, value)) + self.logger.debug("Set %s in %s: value %s is not a Node" % (name, sc, value)) else: - self.logger.debug('Set: name %s not in scope' % (name)) + self.logger.debug("Set: name %s not in scope" % (name)) ########################################################################### # Attribute getter and setter @@ -1451,10 +1495,10 @@ def get_node(self, namespace, name, ast_node=None, flavor=Flavor.UNSPECIFIED): def get_parent_node(self, graph_node): """Get the parent node of the given Node. (Used in postprocessing.)""" - if '.' in graph_node.namespace: - ns, name = graph_node.namespace.rsplit('.', 1) + if "." in graph_node.namespace: + ns, name = graph_node.namespace.rsplit(".", 1) else: - ns, name = '', graph_node.namespace + ns, name = "", graph_node.namespace return self.get_node(ns, name, None) def associate_node(self, graph_node, ast_node, filename=None): @@ -1667,17 +1711,26 @@ def expand_unknowns(self): n.defined = False def cull_inherited(self): - """For each use edge from W to X.name, if it also has an edge to W to Y.name where Y is used by X, then remove the first edge.""" + """ + For each use edge from W to X.name, if it also has an edge to W to Y.name where + Y is used by X, then remove the first edge. + """ removed_uses_edges = [] for n in self.uses_edges: for n2 in self.uses_edges[n]: inherited = False for n3 in self.uses_edges[n]: - if n3.name == n2.name and n2.namespace is not None and n3.namespace is not None and n3.namespace != n2.namespace: + if ( + n3.name == n2.name + and n2.namespace is not None + and n3.namespace is not None + and n3.namespace != n2.namespace + ): pn2 = self.get_parent_node(n2) pn3 = self.get_parent_node(n3) - # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: # remove the second edge W to Y.name (TODO: add an option to choose this) + # if pn3 in self.uses_edges and pn2 in self.uses_edges[pn3]: + # remove the second edge W to Y.name (TODO: add an option to choose this) if pn2 in self.uses_edges and pn3 in self.uses_edges[pn2]: # remove the first edge W to X.name inherited = True @@ -1698,7 +1751,7 @@ def collapse_inner(self): # BUG: resolve relative imports causes (RuntimeError: dictionary changed size during iteration) # temporary solution is adding list to force a copy of 'self.nodes' for name in list(self.nodes): - if name in ('lambda', 'listcomp', 'setcomp', 'dictcomp', 'genexpr'): + if name in ("lambda", "listcomp", "setcomp", "dictcomp", "genexpr"): for n in self.nodes[name]: pn = self.get_parent_node(n) if n in self.uses_edges: diff --git a/pyan/anutils.py b/pyan/anutils.py index 9cd1bb4..3edd08a 100644 --- a/pyan/anutils.py +++ b/pyan/anutils.py @@ -2,29 +2,33 @@ # -*- coding: utf-8 -*- """Utilities for analyzer.""" -import os.path import ast +import os.path + from .node import Flavor + def head(lst): if len(lst): return lst[0] + def tail(lst): if len(lst) > 1: return lst[1:] else: return [] + def get_module_name(filename): """Try to determine the full module name of a source file, by figuring out if its directory looks like a package (i.e. has an __init__.py file).""" - if os.path.basename(filename) == '__init__.py': + if os.path.basename(filename) == "__init__.py": return get_module_name(os.path.dirname(filename)) - init_path = os.path.join(os.path.dirname(filename), '__init__.py') - mod_name = os.path.basename(filename).replace('.py', '') + init_path = os.path.join(os.path.dirname(filename), "__init__.py") + mod_name = os.path.basename(filename).replace(".py", "") if not os.path.exists(init_path): return mod_name @@ -36,7 +40,8 @@ def get_module_name(filename): if not os.path.dirname(filename): return mod_name - return get_module_name(os.path.dirname(filename)) + '.' + mod_name + return get_module_name(os.path.dirname(filename)) + "." + mod_name + def format_alias(x): """Return human-readable description of an ast.alias (used in Import and ImportFrom nodes).""" @@ -48,6 +53,7 @@ def format_alias(x): else: return "%s" % (x.name) + def get_ast_node_name(x): """Return human-readable name of ast.Attribute or ast.Name. Pass through anything else.""" if isinstance(x, ast.Attribute): @@ -58,19 +64,23 @@ def get_ast_node_name(x): else: return x + # Helper for handling binding forms. def sanitize_exprs(exprs): """Convert ast.Tuples in exprs to Python tuples; wrap result in a Python tuple.""" + def process(expr): if isinstance(expr, (ast.Tuple, ast.List)): return expr.elts # .elts is a Python tuple else: return [expr] + if isinstance(exprs, (tuple, list)): return [process(expr) for expr in exprs] else: return process(exprs) + def resolve_method_resolution_order(class_base_nodes, logger): """Compute the method resolution order (MRO) for each of the analyzed classes. @@ -85,17 +95,21 @@ class LinearizationImpossible(Exception): from functools import reduce from operator import add + def C3_find_good_head(heads, tails): # find an element of heads which is not in any of the tails flat_tails = reduce(add, tails, []) # flatten the outer level for hd in heads: if hd not in flat_tails: break else: # no break only if there are cyclic dependencies. - raise LinearizationImpossible("MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails)) + raise LinearizationImpossible( + "MRO linearization impossible; cyclic dependency detected. heads: %s, tails: %s" % (heads, tails) + ) return hd def remove_all(elt, lst): # remove all occurrences of elt from lst, return a copy return [x for x in lst if x != elt] + def remove_all_in(elt, lists): # remove elt from all lists, return a copy return [remove_all(elt, lst) for lst in lists] @@ -117,6 +131,7 @@ def C3_merge(lists): mro = {} # result try: memo = {} # caching/memoization + def C3_linearize(node): logger.debug("MRO: C3 linearizing %s" % (node)) seen.add(node) @@ -137,6 +152,7 @@ def C3_linearize(node): memo[node] = [node] + C3_merge(lists) logger.debug("MRO: C3 linearized %s, result %s" % (node, memo[node])) return memo[node] + for node in class_base_nodes: logger.debug("MRO: analyzing class %s" % (node)) seen = set() # break cycles (separately for each class we start from) @@ -150,6 +166,7 @@ def C3_linearize(node): # analyzed is so badly formed that the MRO algorithm fails) memo = {} # caching/memoization + def lookup_bases_recursive(node): seen.add(node) if node not in memo: @@ -170,10 +187,13 @@ def lookup_bases_recursive(node): return mro + class UnresolvedSuperCallError(Exception): """For specifically signaling an unresolved super().""" + pass + class Scope: """Adaptor that makes scopes look somewhat like those from the Python 2 compiler module, as far as Pyan's CallGraphVisitor is concerned.""" @@ -181,15 +201,16 @@ class Scope: def __init__(self, table): """table: SymTable instance from symtable.symtable()""" name = table.get_name() - if name == 'top': - name = '' # Pyan defines the top level as anonymous + if name == "top": + name = "" # Pyan defines the top level as anonymous self.name = name self.type = table.get_type() # useful for __repr__() - self.defs = {iden:None for iden in table.get_identifiers()} # name:assigned_value + self.defs = {iden: None for iden in table.get_identifiers()} # name:assigned_value def __repr__(self): return "" % (self.type, self.name) + # A context manager, sort of a friend of CallGraphVisitor (depends on implementation details) class ExecuteInInnerScope: """Execute a code block with the scope stack augmented with an inner scope. diff --git a/pyan/main.py b/pyan/main.py index 8dc54d8..c13ed83 100644 --- a/pyan/main.py +++ b/pyan/main.py @@ -9,13 +9,13 @@ for rendering by e.g. GraphViz or yEd. """ -import logging -from glob import glob from argparse import ArgumentParser +from glob import glob +import logging from .analyzer import CallGraphVisitor from .visgraph import VisualGraph -from .writers import TgfWriter, DotWriter, YedWriter, HTMLWriter, SVGWriter +from .writers import DotWriter, HTMLWriter, SVGWriter, TgfWriter, YedWriter def main(cli_args=None): diff --git a/pyan/node.py b/pyan/node.py index 2f2dbf4..49c8c5b 100644 --- a/pyan/node.py +++ b/pyan/node.py @@ -12,11 +12,12 @@ def make_safe_label(label): out = label for word in unsafe_words: out = out.replace(word, "%sX" % word) - return out.replace('.', '__').replace('*', '') + return out.replace(".", "__").replace("*", "") class Flavor(Enum): """Flavor describes the kind of object a node represents.""" + UNSPECIFIED = "---" # as it says on the tin UNKNOWN = "???" # not determined by analysis (wildcard) @@ -100,7 +101,7 @@ def get_short_name(self): Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: return self.name @@ -108,7 +109,7 @@ def get_annotated_name(self): """Return the short name, plus module and line number of definition site, if available. Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: if self.get_level() >= 1 and self.ast_node is not None: return "%s\\n(%s:%d)" % (self.name, self.filename, self.ast_node.lineno) @@ -119,11 +120,17 @@ def get_long_annotated_name(self): """Return the short name, plus namespace, and module and line number of definition site, if available. Names of unknown nodes will include the *. prefix.""" if self.namespace is None: - return '*.' + self.name + return "*." + self.name else: if self.get_level() >= 1: if self.ast_node is not None: - return "%s\\n\\n(%s:%d,\\n%s in %s)" % (self.name, self.filename, self.ast_node.lineno, repr(self.flavor), self.namespace) + return "%s\\n\\n(%s:%d,\\n%s in %s)" % ( + self.name, + self.filename, + self.ast_node.lineno, + repr(self.flavor), + self.namespace, + ) else: return "%s\\n\\n(%s in %s)" % (self.name, repr(self.flavor), self.namespace) else: @@ -132,12 +139,12 @@ def get_long_annotated_name(self): def get_name(self): """Return the full name of this node.""" - if self.namespace == '': + if self.namespace == "": return self.name elif self.namespace is None: - return '*.' + self.name + return "*." + self.name else: - return self.namespace + '.' + self.name + return self.namespace + "." + self.name def get_level(self): """Return the level of this node (in terms of nested namespaces). @@ -149,7 +156,7 @@ def get_level(self): if self.namespace == "": return 0 else: - return 1 + self.namespace.count('.') + return 1 + self.namespace.count(".") def get_toplevel_namespace(self): """Return the name of the top-level namespace of this node, or "" if none.""" @@ -158,7 +165,7 @@ def get_toplevel_namespace(self): if self.namespace is None: # group all unknowns in one namespace, "*" return "*" - idx = self.namespace.find('.') + idx = self.namespace.find(".") if idx > -1: return self.namespace[0:idx] else: @@ -179,4 +186,4 @@ def get_namespace_label(self): return make_safe_label(self.namespace) def __repr__(self): - return '' % (repr(self.flavor), self.get_name()) + return "" % (repr(self.flavor), self.get_name()) diff --git a/pyan/sphinx.py b/pyan/sphinx.py index ddc31d4..717c07c 100644 --- a/pyan/sphinx.py +++ b/pyan/sphinx.py @@ -28,10 +28,11 @@ from typing import Any from docutils.parsers.rst import directives -from pyan import create_callgraph from sphinx.ext.graphviz import align_spec, figure_wrapper, graphviz from sphinx.util.docutils import SphinxDirective +from pyan import create_callgraph + def direction_spec(argument: Any) -> str: return directives.choice(argument, ("vertical", "horizontal")) diff --git a/pyan/visgraph.py b/pyan/visgraph.py index 9360710..748e67a 100644 --- a/pyan/visgraph.py +++ b/pyan/visgraph.py @@ -2,9 +2,10 @@ # -*- coding: utf-8 -*- """Format-agnostic representation of the output graph.""" -import re -import logging import colorsys +import logging +import re + class Colorizer: """Output graph color manager. @@ -75,10 +76,9 @@ class VisualNode(object): """ A node in the output graph: colors, internal ID, human-readable label, ... """ - def __init__( - self, id, label='', flavor='', - fill_color='', text_color='', group=''): - self.id = id # graphing software friendly label (no special chars) + + def __init__(self, id, label="", flavor="", fill_color="", text_color="", group=""): + self.id = id # graphing software friendly label (no special chars) self.label = label # human-friendly label self.flavor = flavor self.fill_color = fill_color @@ -86,15 +86,11 @@ def __init__( self.group = group def __repr__(self): - optionals = [ - repr(s) for s in [ - self.label, self.flavor, - self.fill_color, self.text_color, self.group] if s] + optionals = [repr(s) for s in [self.label, self.flavor, self.fill_color, self.text_color, self.group] if s] if optionals: - return ('VisualNode(' + repr(self.id) + - ', ' + ', '.join(optionals) + ')') + return "VisualNode(" + repr(self.id) + ", " + ", ".join(optionals) + ")" else: - return 'VisualNode(' + repr(self.id) + ')' + return "VisualNode(" + repr(self.id) + ")" class VisualEdge(object): @@ -103,6 +99,7 @@ class VisualEdge(object): flavor is meant to be 'uses' or 'defines' """ + def __init__(self, source, target, flavor, color): self.source = source self.target = target @@ -110,15 +107,11 @@ def __init__(self, source, target, flavor, color): self.color = color def __repr__(self): - return ( - 'Edge(' + self.source.label + ' ' + self.flavor + ' ' + - self.target.label + ')') + return "Edge(" + self.source.label + " " + self.flavor + " " + self.target.label + ")" class VisualGraph(object): - def __init__( - self, id, label, nodes=None, edges=None, subgraphs=None, - grouped=False): + def __init__(self, id, label, nodes=None, edges=None, subgraphs=None, grouped=False): self.id = id self.label = label self.nodes = nodes or [] @@ -128,13 +121,13 @@ def __init__( @classmethod def from_visitor(cls, visitor, options=None, logger=None): - colored = options.get('colored', False) - nested = options.get('nested_groups', False) - grouped_alt = options.get('grouped_alt', False) - grouped = nested or options.get('grouped', False) # nested -> grouped - annotated = options.get('annotated', False) - draw_defines = options.get('draw_defines', False) - draw_uses = options.get('draw_uses', False) + colored = options.get("colored", False) + nested = options.get("nested_groups", False) + grouped_alt = options.get("grouped_alt", False) + grouped = nested or options.get("grouped", False) # nested -> grouped + annotated = options.get("annotated", False) + draw_defines = options.get("draw_defines", False) + draw_uses = options.get("draw_uses", False) # Terminology: # - what Node calls "label" is a computer-friendly unique identifier @@ -146,12 +139,18 @@ def from_visitor(cls, visitor, options=None, logger=None): if annotated: if grouped: # group label includes namespace already - def labeler(n): return n.get_annotated_name() + def labeler(n): + return n.get_annotated_name() + else: # the node label is the only place to put the namespace info - def labeler(n): return n.get_long_annotated_name() + def labeler(n): + return n.get_long_annotated_name() + else: - def labeler(n): return n.get_short_name() + + def labeler(n): + return n.get_short_name() logger = logger or logging.getLogger(__name__) @@ -168,34 +167,35 @@ def find_filenames(): for node in visited_nodes: filenames.add(node.filename) return filenames - colorizer = Colorizer(num_colors=len(find_filenames()) + 1, - colored=colored, logger=logger) + + colorizer = Colorizer(num_colors=len(find_filenames()) + 1, colored=colored, logger=logger) nodes_dict = dict() - root_graph = cls('G', label='', grouped=grouped) + root_graph = cls("G", label="", grouped=grouped) subgraph = root_graph namespace_stack = [] - prev_namespace = '' # The namespace '' is first in visited_nodes. + prev_namespace = "" # The namespace '' is first in visited_nodes. for node in visited_nodes: - logger.info('Looking at %s' % node.name) + logger.info("Looking at %s" % node.name) # Create the node itself and add it to nodes_dict idx, fill_RGBA, text_RGB = colorizer.make_colors(node) visual_node = VisualNode( - id=node.get_label(), - label=labeler(node), - flavor=repr(node.flavor), - fill_color=fill_RGBA, - text_color=text_RGB, - group=idx) + id=node.get_label(), + label=labeler(node), + flavor=repr(node.flavor), + fill_color=fill_RGBA, + text_color=text_RGB, + group=idx, + ) nodes_dict[node] = visual_node # next namespace? if grouped and node.namespace != prev_namespace: if not prev_namespace: - logger.info('New namespace %s' % (node.namespace)) + logger.info("New namespace %s" % (node.namespace)) else: - logger.info('New namespace %s, old was %s' % (node.namespace, prev_namespace)) + logger.info("New namespace %s, old was %s" % (node.namespace, prev_namespace)) prev_namespace = node.namespace label = node.get_namespace_label() @@ -209,14 +209,11 @@ def find_filenames(): m = re.match(namespace_stack[-1].label, node.namespace) # The '.' check catches siblings in cases like # MeshGenerator vs. Mesh. - while (m is None or - m.end() == len(node.namespace) or - node.namespace[m.end()] != '.'): + while m is None or m.end() == len(node.namespace) or node.namespace[m.end()] != ".": namespace_stack.pop() if not len(namespace_stack): break - m = re.match( - namespace_stack[-1].label, node.namespace) + m = re.match(namespace_stack[-1].label, node.namespace) parentgraph = namespace_stack[-1] if len(namespace_stack) else root_graph parentgraph.subgraphs.append(subgraph) @@ -236,17 +233,12 @@ def find_filenames(): # place closer together those nodes that are linked by a # defines relationship. # - color = "#838b8b" if draw_defines else '#ffffff00' + color = "#838b8b" if draw_defines else "#ffffff00" for n in visitor.defines_edges: if n.defined: for n2 in visitor.defines_edges[n]: if n2.defined: - root_graph.edges.append( - VisualEdge( - nodes_dict[n], - nodes_dict[n2], - 'defines', - color)) + root_graph.edges.append(VisualEdge(nodes_dict[n], nodes_dict[n2], "defines", color)) if draw_uses: color = "#000000" @@ -254,11 +246,6 @@ def find_filenames(): if n.defined: for n2 in visitor.uses_edges[n]: if n2.defined: - root_graph.edges.append( - VisualEdge( - nodes_dict[n], - nodes_dict[n2], - 'uses', - color)) + root_graph.edges.append(VisualEdge(nodes_dict[n], nodes_dict[n2], "uses", color)) return root_graph diff --git a/pyan/writers.py b/pyan/writers.py index a48e8ba..5b3335d 100644 --- a/pyan/writers.py +++ b/pyan/writers.py @@ -3,11 +3,12 @@ """Graph markup writers.""" +import io +import logging import os import subprocess import sys -import logging -import io + from jinja2 import Template @@ -17,7 +18,7 @@ def __init__(self, graph, output=None, logger=None, tabstop=4): self.output = output self.logger = logger or logging.getLogger(__name__) self.indent_level = 0 - self.tabstop = tabstop * ' ' + self.tabstop = tabstop * " " def log(self, msg): self.logger.info(msg) @@ -29,15 +30,15 @@ def dedent(self, level=1): self.indent_level -= level def write(self, line): - self.outstream.write(self.tabstop * self.indent_level + line + '\n') + self.outstream.write(self.tabstop * self.indent_level + line + "\n") def run(self): - self.log('%s running' % type(self)) + self.log("%s running" % type(self)) try: if isinstance(self.output, io.StringIO): # write to stream self.outstream = self.output else: - self.outstream = open(self.output, 'w') # write to file + self.outstream = open(self.output, "w") # write to file except TypeError: self.outstream = sys.stdout self.start_graph() @@ -88,100 +89,77 @@ def finish_graph(self): class TgfWriter(Writer): def __init__(self, graph, output=None, logger=None): - Writer.__init__( - self, graph, - output=output, - logger=logger) + Writer.__init__(self, graph, output=output, logger=logger) self.i = 1 self.id_map = {} def write_node(self, node): - self.write('%d %s' % (self.i, node.label)) + self.write("%d %s" % (self.i, node.label)) self.id_map[node] = self.i self.i += 1 def start_edges(self): - self.write('#') + self.write("#") def write_edge(self, edge): - flavor = 'U' if edge.flavor == 'uses' else 'D' - self.write( - '%s %s %s' % - (self.id_map[edge.source], self.id_map[edge.target], flavor)) + flavor = "U" if edge.flavor == "uses" else "D" + self.write("%s %s %s" % (self.id_map[edge.source], self.id_map[edge.target], flavor)) class DotWriter(Writer): - def __init__(self, graph, - options=None, output=None, logger=None, tabstop=4): - Writer.__init__( - self, graph, - output=output, - logger=logger, - tabstop=tabstop) + def __init__(self, graph, options=None, output=None, logger=None, tabstop=4): + Writer.__init__(self, graph, output=output, logger=logger, tabstop=tabstop) options = options or [] if graph.grouped: options += ['clusterrank="local"'] - self.options = ', '.join(options) + self.options = ", ".join(options) self.grouped = graph.grouped def start_graph(self): - self.write('digraph G {') - self.write(' graph [' + self.options + '];') + self.write("digraph G {") + self.write(" graph [" + self.options + "];") self.indent() def start_subgraph(self, graph): - self.log('Start subgraph %s' % graph.label) + self.log("Start subgraph %s" % graph.label) # Name must begin with "cluster" to be recognized as a cluster by GraphViz. - self.write( - "subgraph cluster_%s {\n" % graph.id) + self.write("subgraph cluster_%s {\n" % graph.id) self.indent() # translucent gray (no hue to avoid visual confusion with any # group of colored nodes) - self.write( - 'graph [style="filled,rounded",' - 'fillcolor="#80808018", label="%s"];' - % graph.label) + self.write('graph [style="filled,rounded",' 'fillcolor="#80808018", label="%s"];' % graph.label) def finish_subgraph(self, graph): - self.log('Finish subgraph %s' % graph.label) + self.log("Finish subgraph %s" % graph.label) # terminate previous subgraph self.dedent() - self.write('}') + self.write("}") def write_node(self, node): - self.log('Write node %s' % node.label) + self.log("Write node %s" % node.label) self.write( '%s [label="%s", style="filled", fillcolor="%s",' - ' fontcolor="%s", group="%s"];' - % ( - node.id, node.label, - node.fill_color, node.text_color, node.group)) + ' fontcolor="%s", group="%s"];' % (node.id, node.label, node.fill_color, node.text_color, node.group) + ) def write_edge(self, edge): source = edge.source target = edge.target color = edge.color - if edge.flavor == 'defines': - self.write( - ' %s -> %s [style="dashed",' - ' color="%s"];' - % (source.id, target.id, color)) + if edge.flavor == "defines": + self.write(' %s -> %s [style="dashed",' ' color="%s"];' % (source.id, target.id, color)) else: # edge.flavor == 'uses': - self.write( - ' %s -> %s [style="solid",' - ' color="%s"];' - % (source.id, target.id, color)) + self.write(' %s -> %s [style="solid",' ' color="%s"];' % (source.id, target.id, color)) def finish_graph(self): - self.write('}') # terminate "digraph G {" + self.write("}") # terminate "digraph G {" class SVGWriter(DotWriter): - def run(self): # write dot file - self.log('%s running' % type(self)) + self.log("%s running" % type(self)) self.outstream = io.StringIO() self.start_graph() self.write_subgraph(self.graph) @@ -190,10 +168,7 @@ def run(self): # convert to svg svg = subprocess.run( - f"dot -Tsvg", - shell=True, - stdout=subprocess.PIPE, - input=self.outstream.getvalue().encode() + "dot -Tsvg", shell=True, stdout=subprocess.PIPE, input=self.outstream.getvalue().encode() ).stdout.decode() if self.output: @@ -207,7 +182,6 @@ def run(self): class HTMLWriter(SVGWriter): - def run(self): with io.StringIO() as svg_stream: # run SVGWriter with stream as output @@ -234,11 +208,7 @@ def run(self): class YedWriter(Writer): def __init__(self, graph, output=None, logger=None, tabstop=2): - Writer.__init__( - self, graph, - output=output, - logger=logger, - tabstop=tabstop) + Writer.__init__(self, graph, output=output, logger=logger, tabstop=tabstop) self.grouped = graph.grouped self.indent_level = 0 self.edge_id = 0 @@ -246,18 +216,19 @@ def __init__(self, graph, output=None, logger=None, tabstop=2): def start_graph(self): self.write('') self.write( - '') + '' + ) self.indent() self.write('') self.write('') @@ -265,96 +236,87 @@ def start_graph(self): self.indent() def start_subgraph(self, graph): - self.log('Start subgraph %s' % graph.label) + self.log("Start subgraph %s" % graph.label) self.write('' % graph.id) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('') - self.write('%s' - % graph.label) + self.write( + '%s' % graph.label + ) self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.write('' % graph.id) self.indent() def finish_subgraph(self, graph): - self.log('Finish subgraph %s' % graph.label) + self.log("Finish subgraph %s" % graph.label) self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def write_node(self, node): - self.log('Write node %s' % node.label) + self.log("Write node %s" % node.label) width = 20 + 10 * len(node.label) self.write('' % node.id) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() self.write('' % ("30", width)) - self.write('' - % node.fill_color) - self.write('') - self.write('%s' - % node.label) + self.write('' % node.fill_color) + self.write('') + self.write("%s" % node.label) self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def write_edge(self, edge): self.edge_id += 1 source = edge.source target = edge.target - self.write( - '' - % (self.edge_id, source.id, target.id)) + self.write('' % (self.edge_id, source.id, target.id)) self.indent() self.write('') self.indent() - self.write('') + self.write("") self.indent() - if edge.flavor == 'defines': - self.write('' - % edge.color) + if edge.flavor == "defines": + self.write('' % edge.color) else: - self.write('' - % edge.color) + self.write('' % edge.color) self.write('') self.write('') self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") self.dedent() - self.write('') + self.write("") def finish_graph(self): self.dedent(2) - self.write(' ') + self.write(" ") self.dedent() - self.write('') + self.write("") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5c2e34e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[tool.isort] +profile = "black" +honor_noqa = true +line_length = 120 +combine_as_imports = true +force_sort_within_sections = true +known_first_party = "pyan" + +[tool.black] +line-length = 120 +include = '\.pyi?$' +exclude = ''' +/( + \.git + | \.hg + | \.mypy_cache + | \.tox + | \.venv + | _build + | egg-info + | buck-out + | build + | dist + | env +)/ +''' diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3392002 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,24 @@ +[flake8] +max-line-length = 120 +show-source = true +ignore = + E203, # space before : (needed for how black formats slicing) + W503, # line break before binary operator + W504, # line break after binary operator + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + E741, # ignore not easy to read variables like i l I etc. + C406, # Unnecessary list literal - rewrite as a dict literal. + C408, # Unnecessary dict call - rewrite as a literal. + C409, # Unnecessary list passed to tuple() - rewrite as a tuple literal. + S001, # found modulo formatter (incorrect picks up mod operations) + F401 # unused imports + W605 # invalid escape sequence (e.g. for LaTeX) +exclude = docs/build/*.py, + node_modules/*.py, + .eggs/*.py, + versioneer.py, + venv/*, + .venv/*, + .git/* + .history/* diff --git a/setup.py b/setup.py index 81f7f93..f6b2aca 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,9 @@ python3 setup.py --help bdist_wheel # or any command """ -import os import ast +import os + from setuptools import setup ######################################################### @@ -32,12 +33,13 @@ # Long description for package homepage on PyPI # DESC = ( - 'Generate approximate call graphs for Python programs.\n' - '\n' - 'Pyan takes one or more Python source files, performs a ' - '(rather superficial) static analysis, and constructs a directed graph of ' - 'the objects in the combined source, and how they define or ' - 'use each other. The graph can be output for rendering by GraphViz or yEd.') + "Generate approximate call graphs for Python programs.\n" + "\n" + "Pyan takes one or more Python source files, performs a " + "(rather superficial) static analysis, and constructs a directed graph of " + "the objects in the combined source, and how they define or " + "use each other. The graph can be output for rendering by GraphViz or yEd." +) ######################################################### # Init @@ -49,7 +51,7 @@ # # https://stackoverflow.com/q/2058802/1959808 # -init_py_path = os.path.join('pyan', '__init__.py') +init_py_path = os.path.join("pyan", "__init__.py") version = None try: with open(init_py_path) as f: @@ -78,48 +80,41 @@ author="Juha Jeronen", author_email="juha.m.jeronen@gmail.com", url="https://github.com/Technologicat/pyan", - description=SHORTDESC, long_description=DESC, - license="GPL 2.0", - # free-form text field; # https://stackoverflow.com/q/34994130/1959808 platforms=["Linux"], - # See # https://pypi.python.org/pypi?%3Aaction=list_classifiers # # for the standard classifiers. # - classifiers=["Development Status :: 4 - Beta", - "Environment :: Console", - "Intended Audience :: Developers", - ("License :: OSI Approved :: " - "GNU General Public License v2 (GPLv2)"), - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Topic :: Software Development" - ], - + classifiers=[ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + ("License :: OSI Approved :: " "GNU General Public License v2 (GPLv2)"), + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Topic :: Software Development", + ], # See # http://setuptools.readthedocs.io/en/latest/setuptools.html # setup_requires=["wheel"], install_requires=["jinja2"], provides=["pyan"], - # keywords for PyPI (in case you upload your project) # # e.g. the keywords your project uses as topics on GitHub, # minus "python" (if there) # keywords=["call-graph", "static-code-analysis"], - # Declare packages so that python -m setup build will copy .py files # (especially __init__.py). # @@ -127,14 +122,12 @@ # so they must also be declared. # packages=["pyan"], - zip_safe=True, - package_data={'pyan': ["callgraph.html"]}, + package_data={"pyan": ["callgraph.html"]}, include_package_data=True, - entry_points={ - 'console_scripts': [ - 'pyan3 = pyan.main:main', + "console_scripts": [ + "pyan3 = pyan.main:main", ] }, ) diff --git a/tests/old_tests/issue3/testi.py b/tests/old_tests/issue3/testi.py index 5798231..4da7858 100644 --- a/tests/old_tests/issue3/testi.py +++ b/tests/old_tests/issue3/testi.py @@ -1,14 +1,21 @@ # -*- coding: utf-8; -*- # See issue #3 + def f(): return [x for x in range(10)] + def g(): return [(x, y) for x in range(10) for y in range(10)] -def h(): - return [([(name, allargs) for name, _, _, allargs, _ in recs], - {name: inargs for name, inargs, _, _, _ in recs}, - {name: meta for name, _, _, _, meta in recs}) - for recs in (results[key] for key in sorted(results.keys()))] + +# def h(): +# return [ +# ( +# [(name, allargs) for name, _, _, allargs, _ in recs], +# {name: inargs for name, inargs, _, _, _ in recs}, +# {name: meta for name, _, _, _, meta in recs}, +# ) +# for recs in (results[key] for key in sorted(results.keys())) +# ] diff --git a/tests/old_tests/issue5/meas_xrd.py b/tests/old_tests/issue5/meas_xrd.py index 1a4587e..072bcb8 100644 --- a/tests/old_tests/issue5/meas_xrd.py +++ b/tests/old_tests/issue5/meas_xrd.py @@ -3,6 +3,7 @@ import numpy as np import pandas.io.parsers + class MeasXRD: def __init__(self, path: str): if not os.path.isfile(path): @@ -23,11 +24,5 @@ def __init__(self, path: str): line = file.readline() self.data = pandas.io.parsers.read_csv( - path, - skiprows=row_ind, - dtype={ - "Angle": np.float_, - "Intensity": np.int_ - }, - engine="c" + path, skiprows=row_ind, dtype={"Angle": np.float_, "Intensity": np.int_}, engine="c" ) diff --git a/tests/old_tests/issue5/plot_xrd.py b/tests/old_tests/issue5/plot_xrd.py index f2526e5..fc2d408 100644 --- a/tests/old_tests/issue5/plot_xrd.py +++ b/tests/old_tests/issue5/plot_xrd.py @@ -1,24 +1,13 @@ -import plotly.offline as py import plotly.graph_objs as go +import plotly.offline as py from . import meas_xrd + def plot_xrd(meas: meas_xrd.MeasXRD): - trace = go.Scatter( - x=meas.data["Angle"], - y=meas.data["Intensity"] - ) + trace = go.Scatter(x=meas.data["Angle"], y=meas.data["Intensity"]) - layout = go.Layout( - title="XRD data", - xaxis=dict( - title="Angle" - ), - yaxis=dict( - title="Intensity", - type="log" - ) - ) + layout = go.Layout(title="XRD data", xaxis=dict(title="Angle"), yaxis=dict(title="Intensity", type="log")) data = [trace] fig = go.Figure(data=data, layout=layout) diff --git a/tests/old_tests/issue5/relimport.py b/tests/old_tests/issue5/relimport.py index 1bf9f9d..9abb7fa 100644 --- a/tests/old_tests/issue5/relimport.py +++ b/tests/old_tests/issue5/relimport.py @@ -1,7 +1,6 @@ # -*- coding: utf-8; -*- # See issue #5 -from .mod2 import foo +from . import mod1, mod1 as moo from ..mod3 import bar -from . import mod1 -from . import mod1 as moo +from .mod2 import foo diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 0bc2b2a..8198445 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -1,10 +1,12 @@ -import logging from glob import glob +import logging import os + import pytest from pyan.analyzer import CallGraphVisitor + @pytest.fixture def callgraph(): filenames = glob(os.path.join(os.path.dirname(__file__), "test_code/**/*.py"), recursive=True) @@ -17,6 +19,7 @@ def get_node(nodes, name): assert len(filtered_nodes) == 1, f"Node with name {name} should exist" return filtered_nodes[0] + def get_in_dict(node_dict, name): return node_dict[get_node(node_dict.keys(), name)] diff --git a/tests/test_code/submodule1.py b/tests/test_code/submodule1.py index c7a7d31..d6893a4 100644 --- a/tests/test_code/submodule1.py +++ b/tests/test_code/submodule1.py @@ -1,23 +1,21 @@ -from test_code.subpackage1 import A from test_code import subpackage1 as subpackage +from test_code.subpackage1 import A def test_func1(a): return a + def test_func2(a): return a class B: - def __init__(self, k): self.a = 1 - def to_A(self): return A(self) def get_a_via_A(self): return test_func1(self.to_A().b.a) - diff --git a/tests/test_code/submodule2.py b/tests/test_code/submodule2.py index d5f66ca..76706b5 100644 --- a/tests/test_code/submodule2.py +++ b/tests/test_code/submodule2.py @@ -1,7 +1,9 @@ -from . import submodule1 import test_code.submodule1 as b +from . import submodule1 + A = 32 + def test_2(a): - return submodule1.test_func2(a) + A + b.test_func1(a) \ No newline at end of file + return submodule1.test_func2(a) + A + b.test_func1(a) diff --git a/tests/test_code/subpackage1/__init__.py b/tests/test_code/subpackage1/__init__.py index 9b81aef..d213d49 100644 --- a/tests/test_code/subpackage1/__init__.py +++ b/tests/test_code/subpackage1/__init__.py @@ -1,3 +1,3 @@ from test_code.subpackage1.submodule1 import A -__all__ = ["A"] \ No newline at end of file +__all__ = ["A"] diff --git a/tests/test_code/subpackage1/submodule1.py b/tests/test_code/subpackage1/submodule1.py index 10204f5..7798ee2 100644 --- a/tests/test_code/subpackage1/submodule1.py +++ b/tests/test_code/subpackage1/submodule1.py @@ -1,7 +1,6 @@ - from ..submodule2 import test_2 -class A: +class A: def __init__(self, b): - self.b = test_2(b) \ No newline at end of file + self.b = test_2(b) diff --git a/tests/test_code/subpackage2/submodule1.py b/tests/test_code/subpackage2/submodule1.py new file mode 100644 index 0000000..5d2722d --- /dev/null +++ b/tests/test_code/subpackage2/submodule1.py @@ -0,0 +1,2 @@ +def test_func1(): + pass