Why not be the first? Anything tagged with "bug"
and "help wanted" is open to whoever wants to implement it. Install with pip or from source.

    With pip:

    .. code::

        pip install picasso-viz

    From the repository:

    .. code::

        git clone git@github.com:merantix/picasso.git
        cd picasso
        pip install -e .

    Note: you'll need the Tensorflow backend for Keras for these examples to work. Make sure your ``~/.keras/keras.json`` file looks like:

    .. code::

        {
            "backend": "tensorflow",
            "image_dim_ordering": "tf",
            "floatx": "float32",
            "epsilon": 1e-07
        } This assumes you've cloned the repository. First install the required packages:

.. code::

    pip install -e .[docs]

Then build them:

.. code::

    cd docs/
    make html

Then you can open ``_build/html/index.html`` in your browser of choice. * This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.

.. _Cookiecutter: https://github.com/audreyr/cookiecutter
.. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
.. _photograph: https://www.flickr.com/photos/maxbraun/4045020694
.. _`Les Demoiselles d'Avignon`: https://en.wikipedia.org/wiki/Les_Demoiselles_d%27Avignon
.. _Elias: https://github.com/Sylvus
.. _Filippo: https://github.com/scopelf
.. _John: https://github.com/JohnMcSpedon
.. _Josh: https://github.com/jwayne
.. _Rasmus: https://github.com/rrothe
.. _Stefan: https://github.com/knub
.. _`Medium post`: https://medium.com/merantix/picasso-a-free-open-source-visualizer-for-cnns-d8ed3a35cfc5 import sys
import os
import sphinx_rtd_theme

# If extensions (or modules to document with autodoc) are in another
# directory, add these directories to sys.path here. # Add any Sphinx extension module names here, as strings. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to +# some non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built +# documents. +#keep_warnings = False + + +# -- Options for HTML output ------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# Theme options are theme-specific and customize the look and feel of a +# theme further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as +# html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the +# top of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon +# of the docs. This file should be a Windows icon file (.ico) being +# 16x16 or 32x32 pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) +# here, relative to this directory. They are copied after the builtin +# static files, so a file named "default.css" will overwrite the builtin +# "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page +# bottom, using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names +# to template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. +# Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. +# Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages +# will contain a tag referring to it. The value of this option +# must be the base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'picassodoc' + + +# -- Options for LaTeX output ------------------------------------------ + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + #'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass +# [howto/manual]). +latex_documents = [ + ('index', 'picasso.tex', + u'picasso Documentation', + u'Ryan Henderson', 'manual'), +] + +# The name of an image file (relative to this directory) to place at +# the top of the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings +# are parts, not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output ------------------------------------ + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'picasso', + u'picasso Documentation', + [u'Ryan Henderson'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ---------------------------------------- + +# Grouping the document tree into Texinfo files. The portion of the computational graph you're interested in requires no other inputs. + +If you built your model with Keras using a `Sequential`_ model, you should be more or less good to go. If you used Tensorflow, you'll need to manually specify the entry and exit points [#]_. + +You can specify the backend (Tensorflow or Keras) using the ``PICASSO_BACKEND_ML`` setting. The allowed values are ``tensorflow`` or ``keras`` (see :doc:`settings`). + +Your model data +=============== + +You can specify the data directory with the ``PICASSO_DATA_DIR`` setting. This directory should contain the Keras or Tensorflow checkpoint files. If multiple checkpoints are found, the latest one will be used (see example `Keras model code`_). + +Utility functions +================= + +In addition to the graph and weight information of the model itself, you'll need to define a few functions to help the visualization interact with user input, and interpret raw output from your computational graph. These are arbitrary python functions, and their locations can be specified in the :doc:`settings`. + +We'll draw from the `Keras MNIST example`_ for this guide. + +Preprocessor +------------ + +The preprocessor takes images uploaded to the webapp and converts them into arrays that can be used as inputs to your model. The Flask app will haved converted them to `PIL Image`_ objects. + +.. code-block:: python3 + + MNIST_DIM = (28, 28) + + def preprocess(targets): + image_arrays = [] + for target in targets: + im = target.convert('L') + im = im.resize(MNIST_DIM, Image.ANTIALIAS) + arr = np.array(im) + image_arrays.append(arr) + + all_targets = np.array(image_arrays) + return all_targets.reshape(len(all_targets), + MNIST_DIM[0], + MNIST_DIM[1], 1).astype('float32') / 255 + +Specifically, we have to convert an arbitrary input color image to a float array of the input size specified with ``MNIST_DIM``. + +Postprocessor +------------- + +For some visualizations, it's useful to convert a flat representation back into an array with the same shape as the original image. + +.. code-block:: python3 + + def postprocess(output_arr): + images = [] + for row in output_arr: + im_array = row.reshape(MNIST_DIM) + images.append(im_array) + + return images + +This therefore takes an arbitrary array (with the same number of total entries as the image array) and reshapes it back. + +Class Decoder +------------- + +Class probabilities are usually returned in an array. For any visualization where we use classification, it's much nicer to have the class labels available. This method simply attaches the labels to computed probabilities. + +.. code-block:: python3 + + def prob_decode(probability_array, top=5): + results = [] + for row in probability_array: + entries = [] + for i, prob in enumerate(row): + entries.append({'index': i, + 'name': str(i), + 'prob': prob}) + + entries = sorted(entries, + key=itemgetter('prob'), + reverse=True)[:top] + + for entry in entries: + entry['prob'] = '{:.3f}'.format(entry['prob']) + results.append(entries) + + return results + +``results`` is then a list of dicts in the format ``[{'index': class_index, 'name': class_name, 'prob': class_probability}, ...]``. In the case of the MNIST dataset, the index is the same as the class name (digits 0-9). + +.. _examples: https://github.com/merantix/picasso/tree/master/picasso/examples + +.. _MNIST: http://yann.lecun.com/exdb/mnist/ + +.. _VGG16: http://www.robots.ox.ac.uk/~vgg/research/very_deep/ + +.. _Sequential: https://keras.io/models/sequential/ + +.. _Keras model code: https://github.com/merantix/picasso/blob/master/picasso/ml_frameworks/keras/model.py + +.. _Keras MNIST example: https://github.com/merantix/picasso/blob/master/picasso/examples/keras/util.py + +.. _PIL Image: http://pillow.readthedocs.io/en/latest/reference/Image.html + +.. .. [#] We hope to remove these limitations in the future to accomodate a wider variety of possible graph topologies while still maintaining separation between the visualization and model implementation as much as possible. Install with pip or from source.

    With pip:

    .. code::

        pip install picasso-viz

    From the repository:

    .. code::

        git clone git@github.com:merantix/picasso.git
        cd picasso
        pip install -e .

    Note: you'll need the Tensorflow backend for Keras for these examples to work. Make sure your ``~/.keras/keras.json`` file looks like:

    .. code::

        {
            "backend": "tensorflow",
            "image_dim_ordering": "tf",
            "floatx": "float32",
            "epsilon": 1e-07
        } By default, the visualizer starts a Keras MNIST example. We've also included a Keras VGG16 example. If this script fails, you might be behind a proxy. Start Flask ``flask run``. If it worked, the "Current checkpoint" label should have changed on the landing page. I haven't tried this yet though, so an extra config parameter may be needed. Let's look at the Tensorflow MNIST example.

.. code-block:: bash

    export PICASSO_SETTINGS=/absolute/path/to/repo/picasso/picasso/examples/tensorflow/config.py

Tells the app to use this configuration instead of the default one. Inside
``config.py``, we have:

.. code-block:: python3

    import os

    base_dir = os.path.split(os.path.abspath(__file__))[0]

    BACKEND_ML = 'tensorflow'
    BACKEND_PREPROCESSOR_NAME = 'util'
    BACKEND_PREPROCESSOR_PATH = os.path.join(base_dir, 'util.py')
    BACKEND_POSTPROCESSOR_NAME = 'postprocess'
    BACKEND_POSTPROCESSOR_PATH = os.path.join(base_dir, 'util.py')
    BACKEND_PROB_DECODER_NAME = 'prob_decode'
    BACKEND_PROB_DECODER_PATH = os.path.join(base_dir, 'util.py')
    DATA_DIR = os.path.join(base_dir, 'data-volume')

Any lowercase line is ignored for the purposes of determining a setting. These
can also be set via environment variables, but you must append the app name.
For instance ``BACKEND_ML = 'tensorflow'`` would become ``export
PICASSO_BACKEND_ML=tensorflow``.

For explanations of each setting, see :mod:`picasso.settings`. Any
additional settings starting with `BACKEND_` will be sent to the model backend
as a keyword argument. The input and output tensor names can be passed to the
Tensorflow backend in this way:

.. code-block:: python3

    ...
    BACKEND_TF_PREDICT_VAR='Softmax:0'
    BACKEND_TF_INPUT_VAR='convolution2d_input_1:0'

.. _managed by Flask: http://flask.pocoo.org/docs/latest/config/ This tutorial will show you how to make a new visualization from scratch. Our visualization will be based on the very simple :class:`~picasso.visualizations.class_probabilities.ClassProbabilities` (see `ClassProbabilities`_ code) visualization, along with its HTML `template`_. It's important that the class name and HTML template name are the same. You must implement the ``__init__`` method, and it should accept one argument, ``model``. ``model`` will be an instance of a child class of `Model`_, which provides an interface to the machine learning backend. You can also add a description which will display on the landing page. Let's fix that. It's just going to compute the class probabilities and pass them back along to the web app. So we'll add: + +.. code-block:: python3 + :emphasize-lines: 11-21 + + from picasso.visualizations import BaseVisualization + + + class FunViz(BaseVisualization): + + def __init__(self, model): + self.description = 'A fun visualization!' + self.model = model + + def make_visualization(self, inputs, output_dir, settings=None): + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + predictions = self.model.sess.run(self.model.tf_predict_var, + feed_dict={self.model.tf_input_var: + pre_processed_arrays}) + filtered_predictions = self.model.decode_prob(predictions) + results = [] + for i, inp in enumerate(inputs): + results.append({'input_file_name': inp['filename'], + 'predict_probs': filtered_predictions[i]}) + return results + +Let's go line by line: + +.. code-block:: python3 + :emphasize-lines: 7,8 + + ... + + class FunViz(BaseVisualization): + ... + + def make_visualization(self, inputs, output_dir, settings=None): + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + ... + +``inputs`` are sent to the visualization class as a list of ``{'filename': ... , 'data': ...}`` dictionaries. The data are `PIL Images`_ created from raw data that the user has uploaded to the webapp. The ``preprocess`` method of ``model`` simply turns the input images into appropriately-sized arrays for the input of whichever computational graph you are using. Therefore, ``pre_processed_arrays`` is an array with the first dimension equal to the number of inputs, and subsequent dimensions determined by the ``preprocess`` function. + +.. code-block:: python3 + :emphasize-lines: 9-11 + + ... + + class FunViz(BaseVisualization): + ... + + def make_visualization(self, inputs, output_dir, settings=None): + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + predictions = self.model.sess.run(self.model.tf_predict_var, + feed_dict={self.model.tf_input_var: + pre_processed_arrays}) + ... + +Here's where we actually do some computation to be used in the visualization. Note that the ``model`` object exposes the Tensorflow session (regardless of if the backend is Keras or Tensorflow). We also store the input and output tensors with the ``model`` members ``tf_input_var`` and ``tf_predict_var`` respectively. Thus this is just a standard Tensorflow run which will return an array of dimension ``n x c`` where ``n`` is the number of inputs, and ``c`` is the number of classes. + +.. code-block:: python3 + :emphasize-lines: 12 + + ... + + class FunViz(BaseVisualization): + ... + + def make_visualization(self, inputs, output_dir, settings=None): + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + predictions = self.model.sess.run(self.model.tf_predict_var, + feed_dict={self.model.tf_input_var: + pre_processed_arrays}) + filtered_predictions = self.model.decode_prob(predictions) + ... + +``decode_prob`` is another model-specific method. It gives us back the class labels from the ``predictions`` array. The format will be list of dictionaries in the format ``[{'index': class_index, 'name': class_name, 'prob': class_probability}, ...]``. It will also only return the top class predictions (this comes in handy when using models like VGG16, which has 1000 classes). + +.. code-block:: python3 + :emphasize-lines: 13-17 + + ... + + class FunViz(BaseVisualization): + ... + + def make_visualization(self, inputs, output_dir, settings=None): + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + predictions = self.model.sess.run(self.model.tf_predict_var, + feed_dict={self.model.tf_input_var: + pre_processed_arrays}) + filtered_predictions = self.model.decode_prob(predictions) + results = [] + for i, inp in enumerate(inputs): + results.append({'input_file_name': inp['filename'], + 'predict_probs': filtered_predictions[i]}) + return results + +Here we arrange the results to pass back to the webapp. In our case, we just return a list of dictionaries which hold the original filename, and the formatted prediction results. The exact structure isn't so important, but you'll have to deal with it when you write your HTML template, so try to keep it manageable. Now you'll be able to see your result page from earlier. + +.. figure:: _static/result_nohtml.png + :align: center + + At least it's fast, right? + +Of course, we haven't told the template how to display the results yet. Let's get down to it. + +Configure the HTML template +=========================== + +We need to specify how to layout our visualization. Here are the lines we'll add: + +.. code-block:: jinja + :emphasize-lines: 3-20 + + {% extends "result.html" %} + {% block vis %} + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+ {% endblock %} + +Let's look at the pieces separately again: + +.. code-block:: jinja + :emphasize-lines: 3,4,19,20 + + {% extends "result.html" %} + {% block vis %} + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+ {% endblock %} + +Every visualization gets a ``results`` object from the web app. The ``results`` object will have the exact same structure as the return value of the ``make_visualization`` method of your visualization class. Since we returned a list, we iterate over it with this for-loop to generate the rows of the table. + +.. code-block:: jinja + :emphasize-lines: 5,10,11,18 + + {% extends "result.html" %} + {% block vis %} + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+ {% endblock %} + +There are actually two rows per result. One with the filename and class labels, and one with the input image and class probabilities. Let's look at each in turn. + +.. code-block:: jinja + :emphasize-lines: 6-9 + + {% extends "result.html" %} + {% block vis %} + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+ {% endblock %} + +The first column has the filename and the class name headers. The for-loop loops over the ``result.predict_prob`` list of predictions (which we generated in ``make_visualization``) and puts each class header in a cell. + +.. code-block:: jinja + :emphasize-lines: 12-17 + + {% extends "result.html" %} + {% block vis %} + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+ {% endblock %} + +The second row contains the input image and the actual numerical probabilities. Note the ``inputs/`` in the ``img`` tag. All input images are stored here by the web app. + +.. figure:: _static/basic_vis.png + :align: center + + Sooo beautiful ⊂◉‿◉つ + +Similarly, there is an ``outputs/`` folder (not shown in this example). Its path is passed to the visualization class as ``output_dir``. Anything the visualization stores there is also available to the template (for example, additional images needed for the visualization). + +Add some settings +================= + +Maybe we'd like the user to be able to limit the number of classes shown. We can easily do this by adding a ``settings`` property to the ``FunViz`` class. + +.. code-block:: python3 + :emphasize-lines: 5, 21 + + from picasso.visualizations import BaseVisualization + + + class FunViz(BaseVisualization): + settings = {'Display': ['1', '2', '3']} + + def __init__(self, model): + self.description = 'A fun visualization!' + self.model = model + + def make_visualization(self, inputs, output_dir, settings=None): + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + predictions = self.model.sess.run(self.model.tf_predict_var, + feed_dict={self.model.tf_input_var: + pre_processed_arrays}) + filtered_predictions = self.model.decode_prob(predictions) + results = [] + for i, inp in enumerate(inputs): + results.append({'input_file_name': inp['filename'], + 'predict_probs': filtered_predictions[i][:int(settings['Display'])]}) + return results + +A page to select the settings will automatically be generated. + +.. figure:: _static/setting.png + :align: center + + The automatically generated settings page + +.. figure:: _static/with_settings.png + :align: center + + It works! ヽ(^◇^*)/ + +Add some styling +================ + +The template that ``FunViz.html`` derives from imports `Bootstrap`_, so you can add some fancier styling if you like! + +.. code-block:: jinja + + {% extends "result.html" %} + {% block vis %} + + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} + +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+ {% endblock %} + +Further Reading +=============== + +For more complex visualizations, see the examples in `the visualizations module`_. + +.. _ClassProbabilities: https://github.com/merantix/picasso/blob/master/picasso/visualizations/class_probabilities.py + +.. _template: https://github.com/merantix/picasso/blob/master/picasso/templates/ClassProbabilities.html + +.. _BaseVisualization: https://github.com/merantix/picasso/blob/master/picasso/visualizations/__init__.py + +.. _Model: https://github.com/merantix/picasso/blob/master/picasso/ml_frameworks/model.py + +.. _Flask: http://flask.pocoo.org/ + +.. _Jinja2: http://jinja.pocoo.org/docs/ + +.. _PIL Images: http://pillow.readthedocs.io/en/latest/reference/Image.html + +.. _Bootstrap: http://getbootstrap.com/ + +.. _the visualizations module: https://github.com/merantix/picasso/blob/master/picasso/visualizations/ diff --git a/picasso/__init__.py b/picasso/__init__.py new file mode 100644 index 0000000..32598a7 --- /dev/null +++ b/picasso/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +__author__ = """Ryan Henderson""" +__email__ = 'ryan@merantix.com' +__version__ = 'v0.1.1' + +from flask import Flask +import os +import sys + +if sys.version_info.major < 3 or (sys.version_info.major == 3 and + sys.version_info.minor < 5): + raise SystemError('Python 3.5+ required, found {}'.format(sys.version)) + +app = Flask(__name__) +app.config.from_object('picasso.settings.Default') + +if os.getenv('PICASSO_SETTINGS'): + app.config.from_envvar('PICASSO_SETTINGS') + +import picasso.picasso diff --git a/picasso/examples/keras-vgg16/config.py b/picasso/examples/keras-vgg16/config.py new file mode 100644 index 0000000..7ab3fa5 --- /dev/null +++ b/picasso/examples/keras-vgg16/config.py @@ -0,0 +1,12 @@ +import os + +base_dir = os.path.dirname(os.path.abspath(__file__)) + +BACKEND_ML = 'keras' +BACKEND_PREPROCESSOR_NAME = 'preprocess' +BACKEND_PREPROCESSOR_PATH = os.path.join(base_dir, 'util.py') +BACKEND_POSTPROCESSOR_NAME = 'postprocess' +BACKEND_POSTPROCESSOR_PATH = os.path.join(base_dir, 'util.py') +BACKEND_PROB_DECODER_NAME = 'prob_decode' +BACKEND_PROB_DECODER_PATH = os.path.join(base_dir, 'util.py') +DATA_DIR = os.path.join(base_dir, 'data-volume') diff --git a/picasso/examples/keras-vgg16/prepare_model.py b/picasso/examples/keras-vgg16/prepare_model.py new file mode 100644 index 0000000..c5815f0 --- /dev/null +++ b/picasso/examples/keras-vgg16/prepare_model.py @@ -0,0 +1,29 @@ +import os +import json +from keras.applications.vgg16 import VGG16 + +path = 'data-volume' +try: + os.mkdir(path) +except FileExistsError: + pass + +print('Downloading and setting up VGG16...') + +vgg16 = VGG16() + +print('Saving...') + +if not os.path.exists(os.path.join(os.path.dirname(__file__), path)): + os.makedirs(os.path.join(os.path.dirname(__file__), path)) + +with open(os.path.join(os.path.dirname(__file__), + path, + 'vgg16.json'), 'w') as json_file: + json.dump(vgg16.to_json(), json_file) + +vgg16.save_weights(os.path.join(os.path.dirname(__file__), + path, + 'vgg16.hdf5')) + +print('Done.') diff --git a/picasso/examples/keras-vgg16/util.py b/picasso/examples/keras-vgg16/util.py new file mode 100644 index 0000000..e56eaa0 --- /dev/null +++ b/picasso/examples/keras-vgg16/util.py @@ -0,0 +1,53 @@ +from keras.applications.imagenet_utils import (decode_predictions, + preprocess_input) +import keras.applications.imagenet_utils +from PIL import Image +import numpy as np + +VGG16_DIM = (224, 224, 3) + + +def preprocess(targets): + image_arrays = [] + for target in targets: + im = target.resize(VGG16_DIM[:2], Image.ANTIALIAS) + im = im.convert('RGB') + arr = np.array(im).astype('float32') + image_arrays.append(arr) + + all_targets = np.array(image_arrays) + return preprocess_input(all_targets) + + +def postprocess(output_arr): + images = [] + for row in output_arr: + im_array = row.reshape(VGG16_DIM[:2]) + images.append(im_array) + + return images + + +def prob_decode(probability_array, top=5): + r = decode_predictions(probability_array, top=top) + results = [ + [{'code': entry[0], + 'name': entry[1], + 'prob': '{:.3f}'.format(entry[2])} + for entry in row] + for row in r + ] + classes = keras.applications.imagenet_utils.CLASS_INDEX + class_keys = list(classes.keys()) + class_values = list(classes.values()) + + for result in results: + for entry in result: + entry.update( + {'index': + int( + class_keys[class_values.index([entry['code'], + entry['name']])] + )} + ) + return results diff --git a/picasso/examples/keras/config.py b/picasso/examples/keras/config.py new file mode 100644 index 0000000..0ecdf76 --- /dev/null +++ b/picasso/examples/keras/config.py @@ -0,0 +1,19 @@ +# Note: this settings file duplicates the default settings in the top-level +# file `settings.py`. b/picasso/examples/keras/data-volume/model.json new file mode 100644 index 0000000..394e41e --- /dev/null +++ b/picasso/examples/keras/data-volume/model.json @@ -0,0 +1 @@ +"{\"class_name\": \"Sequential\", \"keras_version\": \"1.2.1\", \"config\": [{\"class_name\": \"Convolution2D\", \"config\": {\"b_regularizer\": null, \"W_constraint\": null, \"b_constraint\": null, \"name\": \"images\", \"activity_regularizer\": null, \"trainable\": true, \"dim_ordering\": \"tf\", \"nb_col\": 3, \"subsample\": [1, 1], \"init\": \"glorot_uniform\", \"bias\": true, \"nb_filter\": 32, \"input_dtype\": \"float32\", \"border_mode\": \"valid\", \"batch_input_shape\": [null, 28, 28, 1], \"W_regularizer\": null, \"activation\": \"linear\", \"nb_row\": 3}}, {\"class_name\": \"Activation\", \"config\": {\"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_1\"}}, {\"class_name\": \"Convolution2D\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"convolution2d_1\", \"activity_regularizer\": null, \"trainable\": true, \"dim_ordering\": \"tf\", \"nb_col\": 3, \"subsample\": [1, 1], \"init\": \"glorot_uniform\", \"bias\": true, \"nb_filter\": 32, \"border_mode\": \"valid\", \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"nb_row\": 3}}, {\"class_name\": \"Activation\", \"config\": {\"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_2\"}}, {\"class_name\": \"MaxPooling2D\", \"config\": {\"name\": \"maxpooling2d_1\", \"trainable\": true, \"dim_ordering\": \"tf\", \"pool_size\": [2, 2], \"strides\": [2, 2], \"border_mode\": \"valid\"}}, {\"class_name\": \"Dropout\", \"config\": {\"p\": 0.25, \"trainable\": true, \"name\": \"dropout_1\"}}, {\"class_name\": \"Flatten\", \"config\": {\"trainable\": true, \"name\": \"flatten_1\"}}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"dense_1\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": 4608, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 128}}, {\"class_name\": \"Activation\", \"config\": {\"activation\": \"relu\", \"trainable\": true, \"name\": \"activation_3\"}}, {\"class_name\": \"Dropout\", \"config\": {\"p\": 0.5, \"trainable\": true, \"name\": \"dropout_2\"}}, {\"class_name\": \"Dense\", \"config\": {\"W_constraint\": null, \"b_constraint\": null, \"name\": \"logits\", \"activity_regularizer\": null, \"trainable\": true, \"init\": \"glorot_uniform\", \"bias\": true, \"input_dim\": 128, \"b_regularizer\": null, \"W_regularizer\": null, \"activation\": \"linear\", \"output_dim\": 10}}, {\"class_name\": \"Activation\", \"config\": {\"activation\": \"softmax\", \"trainable\": true, \"name\": \"activation_4\"}}]}" \ No newline at end of file diff --git a/picasso/examples/keras/util.py b/picasso/examples/keras/util.py new file mode 100644 index 0000000..6f6202e --- /dev/null +++ b/picasso/examples/keras/util.py @@ -0,0 +1,90 @@ +from PIL import Image +from operator import itemgetter +import numpy as np + +MNIST_DIM = (28, 28) + + +def preprocess(targets): + """Turn images into computation inputs + + Converts an iterable of PIL Images into a suitably-sized numpy array which + can be used as an input to the evaluation portion of the Keras/tensorflow + graph. + + Args: + targets (list of Images): a list of PIL Image objects + + Returns: + array (float32) + + """ + image_arrays = [] + for target in targets: + im = target.convert('L') + im = im.resize(MNIST_DIM, Image.ANTIALIAS) + arr = np.array(im) + image_arrays.append(arr) + + all_targets = np.array(image_arrays) + return all_targets.reshape(len(all_targets), + MNIST_DIM[0], + MNIST_DIM[1], 1).astype('float32') / 255 + + +def postprocess(output_arr): + """Reshape arrays to original image dimensions + + Typically used for outputs or computations on intermediate layers which + make sense to represent as an image in the original dimension of the input + images (see ``SaliencyMaps``). + + Args: + output_arr (array of float32): Array of leading dimension n containing + n arrays to be reshaped + + Returns: + reshaped array + + """ + images = [] + for row in output_arr: + im_array = row.reshape(MNIST_DIM) + images.append(im_array) + + return images + + +def prob_decode(probability_array, top=5): + """Provide class information from output probabilities + + Gives the visualization additional context for the computed class + probabilities. + + Args: + probability_array (array): class probabilities + top (int): number of class entries to return. Useful for limiting + output in models with many classes. Defaults to 5. + + Returns: + result list of dict in the format [{'index': class_index, 'name': + class_name, 'prob': class_probability}, ...] + + """ + results = [] + for row in probability_array: + entries = [] + for i, prob in enumerate(row): + entries.append({'index': i, + 'name': str(i), + 'prob': prob}) + + entries = sorted(entries, + key=itemgetter('prob'), + reverse=True)[:top] + + for entry in entries: + entry['prob'] = '{:.3f}'.format(entry['prob']) + results.append(entries) + + return results diff --git a/picasso/examples/tensorflow/config.py b/picasso/examples/tensorflow/config.py new file mode 100644 index 0000000..60de7b0 --- /dev/null +++ b/picasso/examples/tensorflow/config.py @@ -0,0 +1,14 @@ +import os + +base_dir = os.path.dirname(os.path.abspath(__file__)) + +BACKEND_ML = 'tensorflow' +BACKEND_PREPROCESSOR_NAME = 'preprocess' +BACKEND_PREPROCESSOR_PATH = os.path.join(base_dir, 'util.py') +BACKEND_POSTPROCESSOR_NAME = 'postprocess' +BACKEND_POSTPROCESSOR_PATH = os.path.join(base_dir, 'util.py') +BACKEND_PROB_DECODER_NAME = 'prob_decode' +BACKEND_PROB_DECODER_PATH = os.path.join(base_dir, 'util.py') +BACKEND_TF_PREDICT_VAR = 'Softmax:0' +BACKEND_TF_INPUT_VAR = 'convolution2d_input_1:0' +DATA_DIR = os.path.join(base_dir, 'data-volume') diff --git a/picasso/examples/tensorflow/data-volume/checkpoint b/picasso/examples/tensorflow/data-volume/checkpoint new file mode 100644 index 0000000..e37193c --- /dev/null +++ b/picasso/examples/tensorflow/data-volume/checkpoint @@ -0,0 +1,2 @@ +model_checkpoint_path: "/tmp/convolutional.ckpt" +all_model_checkpoint_paths: "/tmp/convolutional.ckpt" diff --git a/picasso/examples/tensorflow/data-volume/convolutional.ckpt.data-00000-of-00001 b/picasso/examples/tensorflow/data-volume/convolutional.ckpt.data-00000-of-00001 new file mode 100644 index 0000000..ca24b6b Binary files /dev/null and b/picasso/examples/tensorflow/data-volume/convolutional.ckpt.data-00000-of-00001 differ diff --git a/picasso/examples/tensorflow/data-volume/convolutional.ckpt.index b/picasso/examples/tensorflow/data-volume/convolutional.ckpt.index new file mode 100644 index 0000000..a7ec312 Binary files /dev/null and b/picasso/examples/tensorflow/data-volume/convolutional.ckpt.index differ diff --git a/picasso/examples/tensorflow/data-volume/convolutional.ckpt.meta b/picasso/examples/tensorflow/data-volume/convolutional.ckpt.meta new file mode 100644 index 0000000..31ae2a4 Binary files /dev/null and b/picasso/examples/tensorflow/data-volume/convolutional.ckpt.meta differ diff --git a/picasso/examples/tensorflow/util.py b/picasso/examples/tensorflow/util.py new file mode 100644 index 0000000..1820184 --- /dev/null +++ b/picasso/examples/tensorflow/util.py @@ -0,0 +1,48 @@ +from PIL import Image +from operator import itemgetter +import numpy as np + +MNIST_DIM = (28, 28) + + +def preprocess(targets): + image_arrays = [] + for target in targets: + im = target.convert('L') + im = im.resize(MNIST_DIM, Image.ANTIALIAS) + arr = np.array(im) + image_arrays.append(arr) + + all_targets = np.array(image_arrays) + return all_targets.reshape(len(all_targets), + MNIST_DIM[0], + MNIST_DIM[1], 1).astype('float32') / 255 + + +def postprocess(output_arr): + images = [] + for row in output_arr: + im_array = row.reshape(MNIST_DIM) + images.append(im_array) + + return images + + +def prob_decode(probability_array, top=5): + results = [] + for row in probability_array: + entries = [] + for i, prob in enumerate(row): + entries.append({'index': i, + 'name': str(i), + 'prob': prob}) + + entries = sorted(entries, + key=itemgetter('prob'), + reverse=True)[:top] + + for entry in entries: + entry['prob'] = '{:.3f}'.format(entry['prob']) + results.append(entries) + + return results diff --git a/picasso/ml_frameworks/__init__.py b/picasso/ml_frameworks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/picasso/ml_frameworks/keras/__init__.py b/picasso/ml_frameworks/keras/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/picasso/ml_frameworks/keras/model.py b/picasso/ml_frameworks/keras/model.py new file mode 100644 index 0000000..5b6715c --- /dev/null +++ b/picasso/ml_frameworks/keras/model.py @@ -0,0 +1,72 @@ +import os +import glob +import json +from datetime import datetime + +import keras.backend as K +from keras.models import model_from_json + +from picasso.ml_frameworks.tensorflow.model import TFModel + + +class KerasModel(TFModel): + """Implements model loading functions for Keras + + Using this Keras module will require the h5py library, + which is not included with Keras + + Attributes: + sess (Tensorflow :obj:`Session`): underlying Tensorflow session of + the Keras model. + tf_predict_var (:obj:`Tensor`): tensorflow tensor which represents + the class probabilities + tf_input_var (:obj:`Tensor`): tensorflow tensor which represents + the inputs + + """ + + def load(self, data_dir='./'): + """Load graph and weight data + + Args: + data_dir (:obj:`str`): location of Keras checkpoint (`.hdf5`) files + and model (in `.json`) structure. The default behavior + is to take the latest of each, by OS timestamp. + + """ + + # find newest ckpt and graph files + try: + latest_ckpt = max(glob.iglob( + os.path.join(data_dir, '*.h*5')), + key=os.path.getctime) + + self.latest_ckpt_name = os.path.basename(latest_ckpt) + self.latest_ckpt_time = str(datetime.fromtimestamp( + os.path.getmtime(latest_ckpt)) + ) + + except ValueError: + raise FileNotFoundError('No checkpoint (.hdf5 or .h5) files ' + 'available at {}'.format(data_dir)) + try: + latest_json = max(glob.iglob(os.path.join(data_dir, '*.json')), + key=os.path.getctime) + except ValueError: + raise FileNotFoundError('No graph (.json) files ' + 'available at {}'.format(data_dir)) + + # for tensorflow compatibility + K.set_learning_phase(0) + with open(latest_json, 'r') as f: + model_json = json.loads(f.read()) + self.model = model_from_json(model_json) + + self.model.load_weights(latest_ckpt) + self.sess = K.get_session() + + self.tf_predict_var = self.model.outputs[0] + self.tf_input_var = self.model.inputs[0] + + def _predict(self, input_array): + return self.model.predict(input_array) diff --git a/picasso/ml_frameworks/model.py b/picasso/ml_frameworks/model.py new file mode 100644 index 0000000..4d687b1 --- /dev/null +++ b/picasso/ml_frameworks/model.py @@ -0,0 +1,237 @@ +import importlib.util +import warnings +from importlib import import_module +from operator import itemgetter + +ML_LIBRARIES = { + 'tensorflow': + 'picasso.ml_frameworks.tensorflow.model.TFModel', + 'keras': + 'picasso.ml_frameworks.keras.model.KerasModel' +} + + +class Model: + """Model class interface. + + All ML frameworks should derive from this class for the purposes of + the visualization. This class loads saved files generated by various + ML frameworks and allows us to extract the graph topology, weights, etc. + + """ + + def __init__(self, + preprocessor_name='preprocess', + preprocessor_path=None, + postprocessor_name='postprocess', + postprocessor_path=None, + prob_decoder_name='prob_decode', + prob_decoder_path=None, + top_probs=5, + **kwargs): + """Attempt to load utilities + + The class constructor attempts to import a preprocessor, postprocessor, + and probability decoder if a path is supplied. + + Args: + preprocessor_name (str, optional): the name of the preprocessing + function. Defaults to 'preprocess'. + preprocessor_path (str, optional): the absolute path to the file + containing the function named above. If `None`, then do not + try to load a preprocessor. Defaults to `None`. + postprocessor_name (str, optional): the name of the postprocessing + function. Defaults to 'postprocess'. + postprocessor_path (str, optional): the absolute path to the file + containing the function named above. If `None`, then do not + try to load a postprocessor. Defaults to `None`. + prob_decoder_name (str, optional): the name of the postprocessing + function. Defaults to 'prob_decode'. + prob_decoder_path (str, optional): the absolute path to the file + containing the function named above. If `None`, then do not + try to load a prob_decoder. Defaults to `None`. + top_probs (int): Number of classes to display per result. For + instance, VGG16 has 1000 classes, we don't want to display a + visualization for every single possibility. Defaults to 5. + **kwargs: Arbitrary keyword arguments, useful for passing specific + settings to derived classes. + + Example: + If you define a function called "preprocess" at "/path/to/util.py", + then try:: + + preprocessor_name='preprocess', + preprocessor_path='/path/to/util.py' + + """ + self.latest_ckpt_name = None + self.latest_ckpt_time = None + self.top_probs = top_probs + + self.preprocessor_name = preprocessor_name + self.preprocessor_path = preprocessor_path + self.postprocessor_name = postprocessor_name + self.postprocessor_path = postprocessor_path + self.prob_decoder_name = prob_decoder_name + self.prob_decoder_path = prob_decoder_path + + for util in ('preprocessor', 'postprocessor', 'prob_decoder'): + if getattr(self, '{}_path'.format(util)): + spec = importlib.util.\ + spec_from_file_location( + getattr(self, '{}_name'.format(util)), + getattr(self, '{}_path'.format(util))) + setattr(self, util, importlib.util.module_from_spec(spec)) + spec.loader.exec_module(getattr(self, util)) + + if kwargs: + for key, value in kwargs.items(): + setattr(self, key, value) + + def load(self, data_dir, **kwargs): + """Load the model in the desired framework + + Given a directory where model data (weights and graph + structure), should be able to restore the model locally to the point + where it can be evaluated. + + Args: + data_dir (:obj:`str`): full path to directory containing + weight and graph data + **kwargs: Arbitrary keyword arguments, useful for passing specific + settings to derived classes. + + """ + raise NotImplementedError + + def _predict(self, targets): + """Evaluate new examples and return class probablilites + + Given an iterable of examples or numpy array where the first + dimension is the number of example, return a n_examples x + n_classes array of class predictions + + Args: + targets: iterable of arrays suitable for input into graph + + Returns: + array of class probabilities + + """ + raise NotImplementedError + + def predict(self, raw_targets): + """Predict from raw data + + Takes an iterable of data in its raw format. Passes to the + preprocessor and then the child class _predict. + + Args: + raw_targets (:obj:`list` of :obj:`PIL.Image`): the images + to be processed + + Returns: + array of class probabilities + + """ + return self._predict(self.preprocess(raw_targets)) + + def preprocess(self, raw_targets): + """Preprocess raw input for evaluation by model + + Usually, input will need some preprocessing before submission + to a computation graph. For instance, the raw image may need + to converted to a numpy array of appropriate dimension + + Args: + raw_targets (:obj:`list` of :obj:`PIL.Image`): the images + to be processed + + Returns: + iterable of arrays of the correct shape for input into graph + + """ + try: + return getattr(self.preprocessor, + self.preprocessor_name)(raw_targets) + except AttributeError: + warnings.warn('Evaluating without preprocessor') + return raw_targets + + def postprocess(self, output_arr): + """Postprocess prediction results back into images + + Sometimes it's useful to display an intermediate computation + as image. This is model-dependent. + + Args: + output_arr (iterable of arrays): any array with the + same total number of entries an input array + + Returns: + iterable of arrays in original image shape + + """ + + try: + return getattr(self.postprocessor, + self.postprocessor_name)(output_arr) + except AttributeError: + warnings.warn('Evaluating without postprocessor') + return output_arr + + def decode_prob(self, output_arr): + """Label class probabilites with class names + + Args: + output_arr (array): class probabilities + + Returns: + result list of dict in the format [{'index': class_index, 'name': + class_name, 'prob': class_probability}, ...] + + """ + + try: + return getattr(self.prob_decoder, + self.prob_decoder_name)(output_arr, + top=self.top_probs) + except AttributeError: + warnings.warn('Evaluating without class decoder') + results = [] + for row in output_arr: + entries = [] + for i, prob in enumerate(row): + entries.append({'index': i, + 'name': str(i), + 'prob': prob}) + + entries = sorted(entries, + key=itemgetter('prob'), + reverse=True)[:self.top_probs] + + for entry in entries: + entry['prob'] = '{:.3f}'.format(entry['prob']) + results.append(entries) + return results + + +def generate_model(backend_ml, **kwargs): + """Create a new instance of ML backend + + Args: + backend_ml (:obj:`str`): name of the backend to use + **kwargs: Arbitrary keyword arguments + + Returns: + An instance of :class:`.ml_frameworks.model.Model` + + """ + module_name, _, class_name = \ + ML_LIBRARIES[backend_ml].rpartition('.') + + cls = getattr(import_module(module_name), class_name) + + kwargs = {k.partition('_')[-1]: + v for (k, v) in kwargs.items()} + return cls(**kwargs) diff --git a/picasso/ml_frameworks/tensorflow/__init__.py b/picasso/ml_frameworks/tensorflow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/picasso/ml_frameworks/tensorflow/model.py b/picasso/ml_frameworks/tensorflow/model.py new file mode 100644 index 0000000..7f11ffd --- /dev/null +++ b/picasso/ml_frameworks/tensorflow/model.py @@ -0,0 +1,56 @@ +import os +import glob +from datetime import datetime + +import tensorflow as tf + +from picasso.ml_frameworks.model import Model + + +class TFModel(Model): + """Implements model loading functions for tensorflow""" + + def load(self, data_dir='./'): + """Load graph and weight data + + Args: + data_dir (:obj:`str`): location of tensorflow checkpoint + data. We'll need the .meta file to reconstruct + the graph and the data (checkpoint) files to + fill in the weights of the model. The default + behavior is take the latest files, by OS timestamp. + + """ + + self.sess = tf.Session() + self.sess.as_default() + # find newest ckpt and meta files + try: + latest_ckpt_fn = max(glob.iglob(os.path.join(data_dir, '*.ckpt*')), + key=os.path.getctime) + self.latest_ckpt_time = str(datetime.fromtimestamp( + os.path.getmtime(latest_ckpt_fn) + )) + latest_ckpt = latest_ckpt_fn[:latest_ckpt_fn.rfind('.ckpt') + 5] + except ValueError: + raise FileNotFoundError('No checkpoint (.ckpt) files ' + 'available at {}'.format(data_dir)) + try: + latest_meta = max(glob.iglob(os.path.join(data_dir, '*.meta')), + key=os.path.getctime) + except ValueError: + raise FileNotFoundError('No graph (.meta) files ' + 'available at {}'.format(data_dir)) + + with self.sess.as_default() as sess: + self.saver = tf.train.import_meta_graph(latest_meta) + self.saver.restore(sess, latest_ckpt) + + self.tf_predict_var = \ + self.sess.graph.get_tensor_by_name(self.tf_predict_var) + self.tf_input_var = \ + self.sess.graph.get_tensor_by_name(self.tf_input_var) + + def _predict(self, input_array): + return self.sess.run(self.tf_predict_var, + {self.tf_input_var: input_array}) diff --git a/picasso/picasso.py b/picasso/picasso.py new file mode 100644 index 0000000..10d141a --- /dev/null +++ b/picasso/picasso.py @@ -0,0 +1,279 @@ +# -*- coding: utf-8 -*- +"""Flask server code for visualization + +This is the entry point for the application. """Flask server code for visualization

This is the entry point for the application. # Use a bogus secret key for debugging ease. # This pattern is used in other projects with Flask and
# tensorflow, but probably isn't the most stable or
# safest way. If the request is `GET`, render the landing page. Otherwise render the visualization. # Why is this necessary? The app will use these settings if none are specified.

You tried to navigate to a page that doesn't exist. Why don't you start over and try again?

+ +{% endblock %} diff --git a/picasso/templates/500.html b/picasso/templates/500.html new file mode 100644 index 0000000..bad3552 --- /dev/null +++ b/picasso/templates/500.html @@ -0,0 +1,12 @@ +{% extends "layout.html" %} +{% block body %} + + +

Something went wrong. Perhaps you tried to upload a non-image file? In that case, start over and try again.

+ +

If you're running locally, you can restart Flask in debug mode and look at the stack trace.

+ +

Finally, if you've found a bug, please file an issue!

+{% endblock %} diff --git a/picasso/templates/ClassProbabilities.html b/picasso/templates/ClassProbabilities.html new file mode 100644 index 0000000..1cdf0c8 --- /dev/null +++ b/picasso/templates/ClassProbabilities.html @@ -0,0 +1,23 @@ +{% extends "result.html" %} +{% block vis %} + + + {% for result in results %} + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + + + {% for predict_prob in result.predict_probs %} + + {% endfor %} + + {% endfor %} + +
{{ result.filename }} {{ predict_prob.name }}
+ + {{ predict_prob.prob }}
+{% endblock %} diff --git a/picasso/templates/PartialOcclusion.html b/picasso/templates/PartialOcclusion.html new file mode 100644 index 0000000..eb77b1a --- /dev/null +++ b/picasso/templates/PartialOcclusion.html @@ -0,0 +1,29 @@ +{% extends "result.html" %} +{% block vis %} + + + {% for result in results %} + + + + {% for prob in result.predict_probs %} + + {% endfor %} + + + + + {% for filename in result.result_filenames %} + + {% endfor %} + + {% endfor %} + +
{{ result.filename }}Occlusion Grid{{ prob.name }}: {{ prob.prob }}
+ + + + + +
+{% endblock %} diff --git a/picasso/templates/SaliencyMaps.html b/picasso/templates/SaliencyMaps.html new file mode 100644 index 0000000..1b3e644 --- /dev/null +++ b/picasso/templates/SaliencyMaps.html @@ -0,0 +1,25 @@ +{% extends "result.html" %} +{% block vis %} + + + {% for result in results %} + + + {% for prob in result.predict_probs %} + + {% endfor %} + + + + {% for filename in result.gradient_image_names %} + + {% endfor %} + + {% endfor %} + +
{{ result.filename }}{{ prob.name }}: {{ prob.prob }}
+ + + +
+{% endblock %} diff --git a/picasso/templates/layout.html b/picasso/templates/layout.html new file mode 100644 index 0000000..cd30161 --- /dev/null +++ b/picasso/templates/layout.html @@ -0,0 +1,31 @@ + + + {{ app_state.app_title }} + + + + + + + + +

{{ app_state.app_title }} + by + Merantix + +


Current backend: {{ app_state.backend }}

+ {% if app_state.latest_ckpt_name is defined %} +

Current checkpoint: {{ app_state.latest_ckpt_name }}

+ {% endif %} + {% if app_state.latest_ckpt_time is defined %} +

Last updated: {{ app_state.latest_ckpt_time }}

+ {% endif %} + Start over +
+ {% block body %}{% endblock %} +
diff --git a/picasso/templates/result.html b/picasso/templates/result.html new file mode 100644 index 0000000..a2d77f2 --- /dev/null +++ b/picasso/templates/result.html @@ -0,0 +1,19 @@ +{% extends "layout.html" %} +{% block body %} +

Selected Visualization: {{ current_vis }}

+ {% if settings is defined %} + {% for setting in settings %} +

+ {{ setting }}: {{ settings[setting] }} +

+ {% endfor %} + {% endif %} + {% block vis %}{% endblock vis %} + {% if duration is defined %} +

Computing the visualization took {{ duration }} seconds. + {% endif %} + {% if reference_link is defined %} + Visualization Reference + {% endif %} +{% endblock %} diff --git a/picasso/templates/select_files.html b/picasso/templates/select_files.html new file mode 100644 index 0000000..e6665fd --- /dev/null +++ b/picasso/templates/select_files.html @@ -0,0 +1,22 @@ +{% extends "layout.html" %} +{% block body %} +

Selected Visualization: {{ current_vis }}

+ {% if settings is defined %} + {% for setting in settings %} +

+ {{ setting }}: {{ settings[setting] }} +

+ {% endfor %} + {% endif %} +
+ + +

Image types only, please. Will be resized to CNN input + shape.

+ +
+{% endblock %} diff --git a/picasso/templates/select_visualization.html b/picasso/templates/select_visualization.html new file mode 100644 index 0000000..1d9bee4 --- /dev/null +++ b/picasso/templates/select_visualization.html @@ -0,0 +1,16 @@ +{% extends "layout.html" %} +{% block body %} +
+ + +
+ +
+{% endblock %} diff --git a/picasso/templates/settings.html b/picasso/templates/settings.html new file mode 100644 index 0000000..d995af5 --- /dev/null +++ b/picasso/templates/settings.html @@ -0,0 +1,17 @@ +{% extends "layout.html" %} +{% block body %} +

Selected Visualization: {{ current_vis }}

+ {% for setting in settings %} +
+ + +
+ {% endfor %} + +
+{% endblock %} diff --git a/picasso/visualizations/__init__.py b/picasso/visualizations/__init__.py new file mode 100644 index 0000000..0146dd3 --- /dev/null +++ b/picasso/visualizations/__init__.py @@ -0,0 +1,42 @@ +"""Visualizations live here + +All default and user-defined visualizations are submodules of this +module. All classes defined in this directory (except BaseVisualization) +will be imported. + +""" +import os +__all__ = [x.rpartition('.')[0] for x in os.listdir(__path__[0]) + if not x.startswith('__') and x.endswith('py')] + + +class BaseVisualization: + """Template for visualizations + + Attributes: + description (:obj:`str`): short description of the visualization + model (instance of :class:`.ml_frameworks.model.Model` or derived class): + backend to use + settings (:obj:`dict`): a settings dictionary. Settings defined + here will be rendered in html for the user to select. See
        derived classes for examples. Since there is an
            associated HTML template, the return type is arbitrary. Regions where classification probability drops
    significantly are likely very important to classification. pad_vertical, v - pad_horizontal) + for w in centers_vertical + for v in centers_horizontal] + ) + + images = [] + for corner in upper_left_corners: + arr = np.array(im) + self.add_occlusion_to_arr(arr, corner, + win_width, win_length, + occ_val=self.occlusion_value) + images.append( + Image.fromarray(arr) + ) + + return {'occluded_images': images, + 'centers_horizontal': centers_horizontal, + 'centers_vertical': centers_vertical, + 'win_width': win_width, + 'win_length': win_length, + 'pad_horizontal': pad_horizontal, + 'pad_vertical': pad_vertical} + + def make_example_image(self, im, + centers_horizontal, centers_vertical, + win_width, win_length, pad_vertical, + pad_horizontal, output_size=(244, 244)): + arr = np.array(im) + # add an example occlusion + self.add_occlusion_to_arr(arr, + (centers_vertical[1] - pad_vertical, + centers_horizontal[1] - pad_horizontal), + win_width, win_length, occ_val=100) + # add grid + g_pad_vertical = round(self.grid_percent * im.size[1]) or 1 + g_pad_horizontal = round(self.grid_percent * im.size[0]) or 1 + w_grid = 2 * g_pad_horizontal + l_grid = 2 * g_pad_vertical + upper_left_corners = np.array( + [(w - g_pad_vertical, v - g_pad_horizontal) + for w in centers_vertical + for v in centers_horizontal] + ) + for corner in upper_left_corners: + self.add_occlusion_to_arr(arr, corner, + w_grid, l_grid) + return Image.fromarray(arr) + + @staticmethod + def get_centers(width, length, + win_width, win_length, + pad_horizontal, pad_vertical, + num_windows): + centers_horizontal = np.linspace(pad_horizontal, + width - pad_horizontal, + num_windows).astype('int') + centers_vertical = np.linspace(pad_vertical, + length - pad_vertical, + num_windows).astype('int') + return centers_horizontal, centers_vertical + + @staticmethod + def add_occlusion_to_arr(arr, upper_left_corner, + width_horizontal, + width_vertical, + occ_val=0): + arr[upper_left_corner[0]: + upper_left_corner[0] + width_vertical, + upper_left_corner[1]: + upper_left_corner[1] + width_horizontal] = occ_val diff --git a/picasso/visualizations/saliency_maps.py b/picasso/visualizations/saliency_maps.py new file mode 100644 index 0000000..060df75 --- /dev/null +++ b/picasso/visualizations/saliency_maps.py @@ -0,0 +1,113 @@ +import os +import time + +import numpy as np +import tensorflow as tf + +import matplotlib +matplotlib.use('Agg') +from matplotlib import pyplot + +from picasso.visualizations import BaseVisualization + + +class SaliencyMaps(BaseVisualization): + """Derivative of classification with respect to input pixels + + Saliency maps are a way of showing which inputs matter most to + classification. The derivative of a class probability with + respect to each input pixel are found with backpropagation. + High values for the derivative indicate pixels important to + classification (as changing them would change the classification). + + """ + description = ('See maximal derivates against class with respect ' + 'to input') + reference_link = 'https://arxiv.org/pdf/1312.6034' + + def __init__(self, model, logit_tensor_name=None): + super(SaliencyMaps, self).__init__(model) + if logit_tensor_name: + self.logit_tensor = self.model.sess.graph \ + .get_tensor_by_name(logit_tensor_name) + else: + self.logit_tensor = self.get_logit_tensor() + + def get_gradient_wrt_class(self, class_index): + gradient_name = 'bv_{class_index}_gradient' \ + .format(class_index=class_index) + try: + return self.model.sess.graph. \ + get_tensor_by_name('{}:0'.format(gradient_name)) + except KeyError: + class_logit = tf.slice(self.logit_tensor, + [0, class_index], + [1, 1]) + return tf.gradients(class_logit, + self.model.tf_input_var, + name=gradient_name)[0] + + def make_visualization(self, inputs, output_dir, settings=None): + + pre_processed_arrays = self.model.preprocess([example['data'] + for example in inputs]) + + # get predictions + predictions = self.model.sess.run(self.model.tf_predict_var, + feed_dict={self.model.tf_input_var: + pre_processed_arrays}) + decoded_predictions = self.model.decode_prob(predictions) + + results = [] + for i, inp in enumerate(inputs): + class_gradients = [] + output_images = [] + relevant_class_indices = [pred['index'] + for pred in decoded_predictions[i]] + gradients_wrt_class = [self.get_gradient_wrt_class(index) for index + in relevant_class_indices] + for gradient_wrt_class in gradients_wrt_class: + class_gradients.append([self.model.sess.run( + gradient_wrt_class, + feed_dict={self.model.tf_input_var: [arr]}) + for arr in pre_processed_arrays]) + output_fns = [] + output_arrays = np.array([gradient[i] for + gradient in class_gradients]) + # if images are color, take the maximum channel + if output_arrays.shape[-1] == 3: + output_arrays = output_arrays.max(-1) + + output_images = self.model.postprocess(np.abs(output_arrays)) + for j, image in enumerate(output_images): + output_fn = '{fn}-{j}-{ts}.png'.format(ts=str(time.time()), + j=j, + fn=inp['filename']) + + if i == 0 and j == 0: + im = pyplot.imshow(image, + cmap='Greys_r') + pyplot.axis('off') + im.axes.get_xaxis().set_visible(False) + im.axes.get_yaxis().set_visible(False) + else: + im.set_data(image) + + pyplot.savefig(os.path.join(output_dir, output_fn), + bbox_inches='tight', pad_inches=0) + output_fns.append(output_fn) + + results.append({'input_file_name': inp['filename'], + 'predict_probs': decoded_predictions[i], + 'gradient_image_names': output_fns}) + return results + + def get_logit_tensor(self): + # Assume that the logits are the tensor input to the last softmax + # operation in the computation graph + sm = [node for node in self.model.sess.graph_def.node + if node.name == + self.model.tf_predict_var.name.split(':')[0]][-1] + logit_op_name = sm.input[0] + return self.model.sess.graph. \ + get_tensor_by_name('{}:0'.format(logit_op_name)) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..5ee6477 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +testpaths = tests diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..5bb36ef --- /dev/null +++ b/setup.cfg @@ -0,0 +1,21 @@ +[bumpversion] +current_version = v0.1.1 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:picasso/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +test = pytest diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5abb552 --- /dev/null +++ b/setup.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python author="Ryan Henderson",
    author_email='ryan@merantix.com',
    url='https://github.com/merantix/picasso', setup_requires=['pytest_runner'] +) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..777ac6f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,31 @@ +from PIL import Image +import numpy as np +import pytest + +from picasso import app as _app + + +@pytest.fixture +def app(): + return _app + + +@pytest.fixture(scope='session') +def random_image_files(tmpdir_factory): + fn = tmpdir_factory.mktemp('images') + for i in range(4): + imarray = np.random.rand(10**i, 10**i, 3) * 255 + img = Image.fromarray(imarray.astype('uint8')).convert('RGBA') + img.save(str(fn.join('{}.png'.format(i))), 'PNG') + return fn + + +@pytest.fixture +def example_prob_array(): + return np.random.random((3, 10)) + + +@pytest.fixture +def base_model(): + from picasso.ml_frameworks.model import Model + return Model() diff --git a/tests/test_picasso.py b/tests/test_picasso.py new file mode 100644 index 0000000..bda8738 --- /dev/null +++ b/tests/test_picasso.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +test_picasso +---------------------------------- + +Tests for `picasso` module. +""" +import os + +from flask import url_for +import pytest +from werkzeug.test import EnvironBuilder + + +class TestWebApp: + from picasso.picasso import VISUALIZATON_CLASSES + + def test_landing_page_get(self, client): + assert client.get(url_for('landing')).status_code == 200 + + @pytest.mark.parametrize("vis", VISUALIZATON_CLASSES) + def test_landing_page_post(self, client, vis): + rv = client.post(url_for('landing'), + data=dict(choice=vis.__name__)) + assert rv.status_code == 200 + + @pytest.mark.parametrize("vis", VISUALIZATON_CLASSES) + def test_settings_page(self, client, vis): + if hasattr(vis, 'settings'): + with client.session_transaction() as sess: + sess['vis_name'] = vis.__name__ + rv = client.post(url_for('visualization_settings')) + assert rv.status_code == 200 + + @pytest.mark.parametrize("vis", VISUALIZATON_CLASSES) + def test_file_selection_get(self, client, vis): + with client.session_transaction() as sess: + sess['vis_name'] = vis.__name__ + rv = client.get(url_for('select_files')) + assert rv.status_code == 200 + + @pytest.mark.parametrize("vis", VISUALIZATON_CLASSES) + def test_file_selection_post(self, client, vis, random_image_files): + with client.session_transaction() as sess: + sess['vis_name'] = vis.__name__ + # load some settings into the session if the visualization calls + # for it + if hasattr(vis, 'settings'): + sess['settings'] = {key: vis.settings[key][0] + for key in vis.settings} + else: + sess['settings'] = {} + + # random images + builder = EnvironBuilder(path=url_for('select_files'), method='POST') + for path in random_image_files.listdir(): + path = str(path) + builder.files.add_file('file[]', path, + filename=os.path.split(str(path))[-1]) + rv = client.post(url_for('select_files'), data=builder.files) + assert rv.status_code == 200 + + +class TestBaseModel: + + def test_decode_prob(self, base_model, example_prob_array): + results = base_model.decode_prob(example_prob_array) + for i, result in enumerate(results): + max_val = max(example_prob_array[i]) + assert result[0]['prob'] == '{:.3f}'.format(max_val) + assert result[0]['index'] == example_prob_array[i].argmax() + assert result[0]['name'] == str(result[0]['index']) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..1b51d2d --- /dev/null +++ b/tox.ini @@ -0,0 +1,22 @@ +[tox] +envlist = py26, py27, py33, py34, py35, flake8 + +[testenv:flake8] +basepython=python +deps=flake8 +commands=flake8 picasso + +[testenv] +setenv = + PYTHONPATH = {toxinidir}:{toxinidir}/picasso +deps = + -r{toxinidir}/requirements_dev.txt +commands = + pip install -U pip + py.test --basetemp={envtmpdir} + + +; If you want to make tox run the tests with the same versions, create a +; requirements.txt with the pinned versions and uncomment the following lines: +; deps = +; -r{toxinidir}/requirements.txt diff 