From aeac71a93638bd9688e4a1b255be8b1adf17beae Mon Sep 17 00:00:00 2001 From: Simon W Date: Tue, 12 Dec 2023 21:43:42 -0800 Subject: [PATCH] mlflow_tutorial --- .../MLflow_and_NeuralProphet.ipynb | 261 ++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 docs/source/how-to-guides/feature-guides/MLflow_and_NeuralProphet.ipynb diff --git a/docs/source/how-to-guides/feature-guides/MLflow_and_NeuralProphet.ipynb b/docs/source/how-to-guides/feature-guides/MLflow_and_NeuralProphet.ipynb new file mode 100644 index 000000000..bc037604f --- /dev/null +++ b/docs/source/how-to-guides/feature-guides/MLflow_and_NeuralProphet.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Integration of NeuralProphet into MLflow" + ], + "metadata": { + "collapsed": false + }, + "id": "62fc336ca6e8ff3e" + }, + { + "cell_type": "markdown", + "source": [ + "MLflow is an open-source platform designed for managing the end-to-end machine learning lifecycle. It provides functionalities for tracking experiments, packaging code into reproducible runs, and sharing and deploying models. \n", + "\n", + "NeuralProphet is compatible with MLflow and we can track our jobs on the MLflow platform." + ], + "metadata": { + "collapsed": false + }, + "id": "b69fe4a00cb5fd9d" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# for this tutorial, we need to install MLflow.\n", + "!pip install mlflow\n", + "\n", + "# Start a MLflow tracking-server on your local machine\n", + "# !mlflow server --host 127.0.0.1 --port 8080\n", + "\n", + "if \"google.colab\" in str(get_ipython()):\n", + " # uninstall preinstalled packages from Colab to avoid conflicts\n", + " !pip uninstall -y torch notebook notebook_shim tensorflow tensorflow-datasets prophet torchaudio torchdata torchtext torchvision\n", + " !pip install git+https://github.com/ourownstory/neural_prophet.git # may take a while\n", + " \n", + "# much faster using the following code, but may not have the latest upgrades/bugfixes\n", + "# pip install neuralprophet" + ], + "metadata": { + "collapsed": false + }, + "id": "68a3a88dce782e94" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from neuralprophet import NeuralProphet, set_log_level, save\n", + "import mlflow\n", + "import time\n", + "\n", + "set_log_level(\"ERROR\")\n", + "\n", + "data_location = \"https://raw.githubusercontent.com/ourownstory/neuralprophet-data/main/datasets/\"\n", + "df = pd.read_csv(data_location + \"air_passengers.csv\")" + ], + "metadata": { + "collapsed": false + }, + "id": "325dbd0616952d9f" + }, + { + "cell_type": "markdown", + "source": [ + "# Setting Up the MLflow Tracking Server" + ], + "metadata": { + "collapsed": false + }, + "id": "44e39e454140e92" + }, + { + "cell_type": "markdown", + "source": [ + "In this step, we're configuring MLflow to use a tracking server for logging and monitoring our machine learning experiments. The tracking server acts as a central repository for MLflow to store experiment data. This includes information like model parameters, metrics, and output files." + ], + "metadata": { + "collapsed": false + }, + "id": "9c999a9f7b26842a" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Set variable 'local' to True if you want to run this notebook locally\n", + "local = False" + ], + "metadata": { + "collapsed": false + }, + "id": "e7d835e606e97447" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Set our tracking server uri for logging\n", + "mlflow.set_tracking_uri(uri=\"http://127.0.0.1:8080\") if local else None" + ], + "metadata": { + "collapsed": false + }, + "id": "a80a4c8d1627e83f" + }, + { + "cell_type": "markdown", + "source": [ + "# End Previous Run" + ], + "metadata": { + "collapsed": false + }, + "id": "1cadfc2029a9f855" + }, + { + "cell_type": "markdown", + "source": [ + "If you have an active run before you start logging and monitoring this will throw an error. Therefore make sure you end all previous active runs. In a normal setting you should not have any active runs and you can ignore the following cell." + ], + "metadata": { + "collapsed": false + }, + "id": "ec9be45ccf0b27b8" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# End previous run if any\n", + "# mlflow.end_run()" + ], + "metadata": { + "collapsed": false + }, + "id": "24b23a7d5b203363" + }, + { + "cell_type": "markdown", + "source": [ + "# Starting an MLflow Experiment with NeuralProphet" + ], + "metadata": { + "collapsed": false + }, + "id": "e7699b83324ea1f6" + }, + { + "cell_type": "markdown", + "source": [ + "In the next step, we'll delve into initiating and managing an MLflow experiment for training a NeuralProphet model. The focus will be on setting up the experiment, defining model hyperparameters, and logging essential training metrics." + ], + "metadata": { + "collapsed": false + }, + "id": "63ebf38330c44d91" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Start a new MLflow run\n", + "if local:\n", + " with mlflow.start_run():\n", + "\n", + " # Create a new MLflow experiment\n", + " mlflow.set_experiment(\"NP-MLflow Quickstart_v1\")\n", + "\n", + " # Set a tag for the experiment\n", + " mlflow.set_tag(\"Description\", \"NeuralProphet MLflow Quickstart\")\n", + "\n", + " # Define NeuralProphet hyperparameters\n", + " params = {\n", + " \"n_lags\": 5,\n", + " \"n_forecasts\": 3,\n", + " }\n", + "\n", + " # Log Hyperparameters\n", + " mlflow.log_params(params)\n", + "\n", + " # Initialize NeuralProphet model and fit\n", + " start = time.time()\n", + " m = NeuralProphet(**params)\n", + " metrics_train = m.fit(df=df, freq=\"MS\")\n", + " end = time.time()\n", + "\n", + " # Log training duration\n", + " mlflow.log_metric('duration', end-start)\n", + "\n", + " # Log training metrics\n", + " mlflow.log_metric('MAE_train', value=list(metrics_train['MAE'])[-1])\n", + " mlflow.log_metric('RMSE_train', value=list(metrics_train['RMSE'])[-1])\n", + " mlflow.log_metric('Loss_train', value=list(metrics_train['Loss'])[-1])\n", + "\n", + " # save model\n", + " model_path = \"np-model.np\"\n", + " save(m, model_path)\n", + "\n", + " # Log the model in MLflow\n", + " mlflow.log_artifact(model_path, \"np-model\")" + ], + "metadata": { + "collapsed": false + }, + "id": "ce5870177ff63aad" + }, + { + "cell_type": "markdown", + "source": [ + "# View the NeuralProphet Run in the MLflow UI" + ], + "metadata": { + "collapsed": false + }, + "id": "cbf9fff72904c5e9" + }, + { + "cell_type": "markdown", + "source": [ + "In order to see the results of our run, we can navigate to the MLflow UI. Since we have already started the Tracking Server at http://localhost:8080, we can simply navigate to that URL in our browser and observe our experiments. \n", + "If we click on the respective experiments we can see a list of all runs associated with the experiment. Clicking on the run will take us to the run page, where the details of what we’ve logged will be shown." + ], + "metadata": { + "collapsed": false + }, + "id": "1c67c63d0f0aaff4" + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}