From a23406828caf27d935025e7c7afd9ec8c1bc066d Mon Sep 17 00:00:00 2001 From: RMCrean Date: Thu, 30 May 2024 17:36:59 +0200 Subject: [PATCH] Update tutorials for v 0.4.0 Also fix for mkdocs not showing new modules --- key_interactions_finder/utils.py | 3 +- mkdocs.yml | 4 +- .../Tutorial_KE07_Regression_ML_Stats.ipynb | 377 ++++++---- ...torial_PTP1B_Classification_ML_Stats.ipynb | 673 ++++++++++++------ 4 files changed, 699 insertions(+), 358 deletions(-) diff --git a/key_interactions_finder/utils.py b/key_interactions_finder/utils.py index 8a55c76..f2eced0 100644 --- a/key_interactions_finder/utils.py +++ b/key_interactions_finder/utils.py @@ -145,7 +145,8 @@ def download_prep_tutorial_dataset(drive_url: str, save_dir: str) -> None: "https://drive.google.com/file/d/1sYr_9stXLrOi_SDzHYLazSZUuzjL7lK-/view?usp=share_link", "https://drive.google.com/file/d/10DbX12ZNPKqRIAlqs55HC8I6zLXJnP_q/view?usp=share_link", "https://drive.google.com/file/d/1wPH4jOFOgIlpySLMN2ebk5PWzYgsrja2/view?usp=share_link", - "https://drive.google.com/file/d/1G4n-CXoqtt_qZtDfDXbByJtMTpbeIA4l/view?usp=share_link" + "https://drive.google.com/file/d/1G4n-CXoqtt_qZtDfDXbByJtMTpbeIA4l/view?usp=share_link", + "https://drive.google.com/file/d/1pqFUMMjt9gDYOxtkVpDmyXwKXiHp0wFi/view?usp=share_link" ] if drive_url not in accepted_links: diff --git a/mkdocs.yml b/mkdocs.yml index f91ee44..959add8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -44,9 +44,9 @@ nav: - Machine Learning: model_building.md - Network Analysis: network_analysis.md - Pymol Projections: pymol_projections.md - - ChimeraX Projections: chimerax_projections.py + - ChimeraX Projections: chimerax_projections.md - Utils: utils.md - - Project Structure Utils: project_structure_utils.py + - Project Structure Utils: project_structure_utils.md - PyContact Processing: pycontact_processing.md markdown_extensions: diff --git a/tutorials/Tutorial_KE07_Regression_ML_Stats.ipynb b/tutorials/Tutorial_KE07_Regression_ML_Stats.ipynb index 8e3baa6..bd1b833 100644 --- a/tutorials/Tutorial_KE07_Regression_ML_Stats.ipynb +++ b/tutorials/Tutorial_KE07_Regression_ML_Stats.ipynb @@ -36,56 +36,62 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: KIF in c:\\users\\rory crean\\desktop\\github\\key-interactions-finder (0.1.1)\n", - "Requirement already satisfied: pandas in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.3.4)\n", - "Requirement already satisfied: numpy in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.21.4)\n", - "Requirement already satisfied: scikit-learn in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.0.1)\n", - "Requirement already satisfied: scipy in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.7.2)\n", - "Requirement already satisfied: xgboost in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.7.2)\n", - "Requirement already satisfied: catboost in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.1.1)\n", - "Requirement already satisfied: MDAnalysis in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (2.4.2)\n", - "Requirement already satisfied: gdown in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (4.6.0)\n", - "Requirement already satisfied: plotly in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from catboost->KIF) (5.4.0)\n", - "Requirement already satisfied: matplotlib in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from catboost->KIF) (3.5.0)\n", - "Requirement already satisfied: graphviz in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from catboost->KIF) (0.20.1)\n", - "Requirement already satisfied: six in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from catboost->KIF) (1.16.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from pandas->KIF) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from pandas->KIF) (2021.3)\n", - "Requirement already satisfied: tqdm in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (4.64.1)\n", - "Requirement already satisfied: requests[socks] in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (2.28.1)\n", - "Requirement already satisfied: beautifulsoup4 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (4.11.1)\n", - "Requirement already satisfied: filelock in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (3.8.2)\n", - "Requirement already satisfied: threadpoolctl in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (3.0.0)\n", - "Requirement already satisfied: networkx>=2.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (2.8.8)\n", - "Requirement already satisfied: mmtf-python>=1.0.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.1.3)\n", - "Requirement already satisfied: GridDataFormats>=0.4.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.0.1)\n", - "Requirement already satisfied: gsd>=1.9.3 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (2.7.0)\n", - "Requirement already satisfied: biopython>=1.80 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.80)\n", - "Requirement already satisfied: fasteners in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (0.18)\n", - "Requirement already satisfied: joblib>=0.12 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.1.0)\n", - "Requirement already satisfied: packaging in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (21.3)\n", - "Requirement already satisfied: mrcfile in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from GridDataFormats>=0.4.0->MDAnalysis->KIF) (1.4.3)\n", - "Requirement already satisfied: pyparsing>=2.2.1 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (3.0.6)\n", - "Requirement already satisfied: cycler>=0.10 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (0.11.0)\n", - "Requirement already satisfied: fonttools>=4.22.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (4.28.1)\n", - "Requirement already satisfied: pillow>=6.2.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (8.4.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (1.3.2)\n", - "Requirement already satisfied: msgpack>=1.0.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from mmtf-python>=1.0.0->MDAnalysis->KIF) (1.0.4)\n", - "Requirement already satisfied: colorama in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from tqdm->gdown->KIF) (0.4.5)\n", - "Requirement already satisfied: soupsieve>1.2 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from beautifulsoup4->gdown->KIF) (2.3.2.post1)\n", - "Requirement already satisfied: tenacity>=6.2.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from plotly->catboost->KIF) (8.0.1)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (2.1.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (1.26.13)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (2022.6.15)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (3.4)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (1.7.1)\n", + "Requirement already satisfied: KIF in /home/roryc/Desktop/git_projects/KIF (0.4.0)\n", + "Requirement already satisfied: pandas in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.2.2)\n", + "Requirement already satisfied: numpy in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.26.4)\n", + "Requirement already satisfied: scikit-learn in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.5.0)\n", + "Requirement already satisfied: scipy in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.13.1)\n", + "Requirement already satisfied: xgboost in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.0.3)\n", + "Requirement already satisfied: catboost in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.2.5)\n", + "Requirement already satisfied: MDAnalysis in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.7.0)\n", + "Requirement already satisfied: MDAnalysisTests in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.7.0)\n", + "Requirement already satisfied: gdown in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (5.2.0)\n", + "Requirement already satisfied: graphviz in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (0.20.3)\n", + "Requirement already satisfied: matplotlib in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (3.9.0)\n", + "Requirement already satisfied: plotly in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (5.22.0)\n", + "Requirement already satisfied: six in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (1.16.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pandas->KIF) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pandas->KIF) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pandas->KIF) (2024.1)\n", + "Requirement already satisfied: beautifulsoup4 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (4.12.3)\n", + "Requirement already satisfied: filelock in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (3.14.0)\n", + "Requirement already satisfied: requests[socks] in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (2.32.2)\n", + "Requirement already satisfied: tqdm in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (4.66.4)\n", + "Requirement already satisfied: GridDataFormats>=0.4.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (1.0.2)\n", + "Requirement already satisfied: mmtf-python>=1.0.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (1.1.3)\n", + "Requirement already satisfied: joblib>=0.12 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (3.5.0)\n", + "Requirement already satisfied: packaging in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (24.0)\n", + "Requirement already satisfied: fasteners in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (0.19)\n", + "Requirement already satisfied: mda-xdrlib in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (0.2.0)\n", + "Requirement already satisfied: pytest>=3.3.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysisTests->KIF) (8.2.1)\n", + "Requirement already satisfied: hypothesis in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysisTests->KIF) (6.102.6)\n", + "Requirement already satisfied: mrcfile in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from GridDataFormats>=0.4.0->MDAnalysis->KIF) (1.5.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (4.52.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (1.4.5)\n", + "Requirement already satisfied: pillow>=8 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (10.3.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (3.1.2)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from mmtf-python>=1.0.0->MDAnalysis->KIF) (1.0.8)\n", + "Requirement already satisfied: iniconfig in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pytest>=3.3.0->MDAnalysisTests->KIF) (2.0.0)\n", + "Requirement already satisfied: pluggy<2.0,>=1.5 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pytest>=3.3.0->MDAnalysisTests->KIF) (1.5.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from beautifulsoup4->gdown->KIF) (2.5)\n", + "Requirement already satisfied: attrs>=22.2.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from hypothesis->MDAnalysisTests->KIF) (23.2.0)\n", + "Requirement already satisfied: sortedcontainers<3.0.0,>=2.1.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from hypothesis->MDAnalysisTests->KIF) (2.4.0)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from plotly->catboost->KIF) (8.3.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (2.2.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (2024.2.2)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (1.7.1)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -96,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -106,7 +112,8 @@ "from key_interactions_finder import stat_modelling\n", "from key_interactions_finder import model_building\n", "from key_interactions_finder import post_proccessing\n", - "from key_interactions_finder import pymol_projections" + "from key_interactions_finder import pymol_projections\n", + "from key_interactions_finder import chimerax_projections" ] }, { @@ -124,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -132,9 +139,9 @@ "output_type": "stream", "text": [ "Downloading...\n", - "From: https://drive.google.com/uc?id=13SjTIbSjF4ai_-Fn1vJDvW6-nsPmCdbw\n", - "To: c:\\Users\\Rory Crean\\Desktop\\Github\\key-interactions-finder\\tutorials\\tutorial_datasets\\tutorial_dataset.zip\n", - "100%|██████████| 22.9M/22.9M [00:00<00:00, 33.2MB/s]\n" + "From: https://drive.google.com/uc?id=1pqFUMMjt9gDYOxtkVpDmyXwKXiHp0wFi\n", + "To: /home/roryc/Desktop/git_projects/KIF/tutorials/tutorial_datasets/tutorial_dataset.zip\n", + "100%|██████████| 22.4M/22.4M [00:02<00:00, 8.48MB/s]\n" ] }, { @@ -148,7 +155,7 @@ "source": [ "from key_interactions_finder.utils import download_prep_tutorial_dataset\n", "\n", - "drive_url = r\"https://drive.google.com/file/d/13SjTIbSjF4ai_-Fn1vJDvW6-nsPmCdbw/view?usp=share_link\"\n", + "drive_url = r\"https://drive.google.com/file/d/1pqFUMMjt9gDYOxtkVpDmyXwKXiHp0wFi/view?usp=share_link\"\n", "save_dir = \"tutorial_datasets/\"\n", "\n", "download_prep_tutorial_dataset(drive_url=drive_url, save_dir=save_dir)" @@ -156,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -169,6 +176,9 @@ "# Path to the variable we will use to filter frames with (optional addition for this system). \n", "w50_chi1_file = in_dir + r\"R1_5d2w_1in10_Trp50_Chi1.dat\"\n", "\n", + "# The pdb file will later be used to help make the ChimeraX visualisations of the results. \n", + "pdb_file = in_dir + r\"R1_5d2w.pdb\"\n", + "\n", "# output folders\n", "stats_out_dir = save_dir + r\"KE07_Tutorial/KE07_stat_analysis\" \n", "ml_out_dir = save_dir + r\"KE07_Tutorial/KE07_ml_analysis\"" @@ -188,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -352,7 +362,7 @@ "[3 rows x 839 columns]" ] }, - "execution_count": 5, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -379,7 +389,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -388,7 +398,7 @@ "(10001, 839)" ] }, - "execution_count": 6, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -408,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -567,7 +577,7 @@ "[3 rows x 840 columns]" ] }, - "execution_count": 7, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -594,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -761,7 +771,7 @@ "[3 rows x 841 columns]" ] }, - "execution_count": 8, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -804,7 +814,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -841,7 +851,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -850,7 +860,7 @@ "dict_keys(['input_df', 'is_classification', 'target_file', 'header_present', 'df_processed', 'df_filtered'])" ] }, - "execution_count": 10, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -879,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1051,7 +1061,7 @@ "[3 rows x 684 columns]" ] }, - "execution_count": 11, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1104,7 +1114,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -1125,7 +1135,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -1133,7 +1143,7 @@ "output_type": "stream", "text": [ "Mutual information scores calculated.\n", - "tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\Mutual_Information_Per_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/Mutual_Information_Per_Feature_Scores.csv written to disk.\n", "You can also access these results via the class attribute: 'mutual_infos'.\n" ] } @@ -1144,7 +1154,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1152,7 +1162,7 @@ "output_type": "stream", "text": [ "Linear correlations calculated.\n", - "tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\Linear_Correlations_Per_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/Linear_Correlations_Per_Feature_Scores.csv written to disk.\n", "You can also access these results via the class attribute: 'linear_correlations'.\n" ] } @@ -1163,7 +1173,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -1184,7 +1194,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -1197,15 +1207,15 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\Mutual_Information_Scores_Per_Residue.csv written to disk.\n", - "tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\Linear_Correlation_Scores_Per_Residue.csv written to disk.\n" + "tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/Mutual_Information_Scores_Per_Residue.csv written to disk.\n", + "tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/Linear_Correlation_Scores_Per_Residue.csv written to disk.\n" ] } ], @@ -1231,29 +1241,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Part 1.3. Project the Results onto Protein Structures with the pymol_projections.py module. \n", - " \n", - "Naturally, we may want to visualise some of the results we have generated above onto a protein structure. We can take advantage of\n", - "the functions provided in the pymol_projections.py module to do this. \n", + "### Part 1.3. Project the Results onto 3D Protein Structures. \n", + "\n", + "Naturally, we may want to visualise some of the results we have generated above onto a protein structure. \n", "\n", - "As the name suggests this will output [PyMOL](https://pymol.org/) compatible python scripts which can be run to represent the results\n", - "at the: \n", + "We can take advantage of the functions provided in either of the following files: \n", "\n", - "1. Per feature level. (Cylinders are drawn between each feature, with the cylinder radii marking how strong the relative difference is. \n", + "1. pymol_projections.py - will output [PyMOL](https://pymol.org/) compatible python scripts\n", + "2. chimerax_projections.py will output [ChimeraX](https://www.cgl.ucsf.edu/chimerax/) compatible scripts\n", + "\n", + "Both modules can be used to represent the results at the:\n", + "1. Per feature/interaction level. (Cylinders are drawn between each feature, with the cylinder radii marking how strong the relative difference is. \n", "2. Per residue level. The Carbon alpha of each residue will be depicted as a sphere, with the sphere radii depicting how strong the the relative difference is. " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3.1 PyMOL Projections" + ] + }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The file: tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\linear_correlation_Pymol_Per_Feature_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\mutual_information_Pymol_Per_Feature_Scores.py was written to disk.\n" + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/linear_correlation_Pymol_Per_Feature_Scores.py was written to disk.\n", + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/mutual_information_Pymol_Per_Feature_Scores.py was written to disk.\n" ] } ], @@ -1277,15 +1296,15 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The file: tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\linear_correlation_Pymol_Per_Res_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\KE07_Tutorial\\KE07_stat_analysis\\mutual_information_Pymol_Per_Res_Scores.py was written to disk.\n" + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/linear_correlation_Pymol_Per_Res_Scores.py was written to disk.\n", + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/mutual_information_Pymol_Per_Res_Scores.py was written to disk.\n" ] } ], @@ -1305,12 +1324,83 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3.2 ChimeraX Projections" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/linear_correlation_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n", + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/mutual_information_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + } + ], + "source": [ + "# Write ChimeraX compatable scripts for the per feature results.\n", + "# Simply swap between the two statistical methods as shown below. \n", + "chimerax_projections.project_chimerax_top_features(\n", + " per_feature_scores=stat_model.linear_correlations,\n", + " model_name=\"linear_correlation\",\n", + " pdb_file=pdb_file,\n", + " numb_features=125, # can be any integer values or \"all\" if you would like all features returned.\n", + " out_dir=stats_out_dir\n", + ")\n", + "\n", + "chimerax_projections.project_chimerax_top_features(\n", + " per_feature_scores=stat_model.mutual_infos,\n", + " model_name=\"mutual_information\",\n", + " pdb_file=pdb_file,\n", + " numb_features=125, # can be any integer values or \"all\" if you would like all features returned.\n", + " out_dir=stats_out_dir\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/linear_correlation_ChimeraX_Per_Res_Scores.cxc was written to disk.\n", + "The file: tutorial_datasets/KE07_Tutorial/KE07_stat_analysis/mutual_information_ChimeraX_Per_Res_Scores.cxc was written to disk.\n" + ] + } + ], + "source": [ + "# Write ChimeraX compatable scripts for the per residue results.\n", + "# Simply swap between the two statistical methods as shown below. \n", + "chimerax_projections.project_chimerax_per_res_scores(\n", + " per_res_scores=lin_correl_per_res_scores,\n", + " model_name=\"linear_correlation\",\n", + " out_dir=stats_out_dir\n", + ")\n", + "\n", + "chimerax_projections.project_chimerax_per_res_scores(\n", + " per_res_scores=mi_per_res_scores,\n", + " model_name=\"mutual_information\",\n", + " out_dir=stats_out_dir\n", + ")" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Now we are complete with the stats module. Here is an example of the kind of figures you can make with the pymol projections generated:\n", + "Now we are complete with the stats module. Here is an example of the kind of figures you can make:\n", "\n", "\n", "
" @@ -1343,7 +1433,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -1395,14 +1485,14 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Model saved to disk at: temporary_files\\CatBoost_Model.pickle\n", + "Model saved to disk at: temporary_files/CatBoost_Model.pickle\n", "Model building complete, returning final results with train/test datasets to you.\n" ] }, @@ -1439,9 +1529,9 @@ " 0\n", " CatBoost\n", " {'iterations': 100}\n", - " 0.869809\n", - " 0.007601\n", - " 2.62\n", + " 0.864941\n", + " 0.007081\n", + " 0.8\n", " \n", " \n", "\n", @@ -1449,13 +1539,13 @@ ], "text/plain": [ " model best_params best_score best_standard_deviation \\\n", - "0 CatBoost {'iterations': 100} 0.869809 0.007601 \n", + "0 CatBoost {'iterations': 100} 0.864941 0.007081 \n", "\n", " Time taken to build model (minutes) \n", - "0 2.62 " + "0 0.8 " ] }, - "execution_count": 21, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1475,7 +1565,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -1512,12 +1602,12 @@ " \n", " 0\n", " CatBoost\n", - " 0.8758\n", - " 17.6354\n", - " 700.6846\n", - " 26.4704\n", - " 0.0401\n", - " 0.8756\n", + " 0.8818\n", + " 18.0938\n", + " 707.5389\n", + " 26.5996\n", + " 0.0413\n", + " 0.8816\n", " \n", " \n", "\n", @@ -1525,13 +1615,13 @@ ], "text/plain": [ " Model Explained Variance Mean Absolute Error MSE RMSE \\\n", - "0 CatBoost 0.8758 17.6354 700.6846 26.4704 \n", + "0 CatBoost 0.8818 18.0938 707.5389 26.5996 \n", "\n", " Mean Squared Log Error r squared \n", - "0 0.0401 0.8756 " + "0 0.0413 0.8816 " ] }, - "execution_count": 22, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1563,7 +1653,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -1582,14 +1672,14 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\KE07_Tutorial\\KE07_ml_analysis\\CatBoost_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/KE07_Tutorial/KE07_ml_analysis/CatBoost_Feature_Scores.csv written to disk.\n", "All per feature scores have now been saved to disk.\n" ] } @@ -1601,14 +1691,14 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\KE07_Tutorial\\KE07_ml_analysis\\CatBoost_Per_Residue_Scores.csv written to disk.\n", + "tutorial_datasets/KE07_Tutorial/KE07_ml_analysis/CatBoost_Per_Residue_Scores.csv written to disk.\n", "All per residue scores have now been saved to disk.\n" ] } @@ -1620,7 +1710,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -1646,7 +1736,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ @@ -1665,27 +1755,34 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Part 2.3. Project the Results onto Protein Structures with the pymol_projections.py module. \n", - " \n", + "### Part 2.3. Project the Results onto Protein Structures \n", + "\n", "This section is essentially identical to 1.3, only that now we will output the ml results instead of the stats results" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.3.1 - For projecting the results with PyMOL \n", + "Here you do not need to specify what model you would like to the output results for, all will be outputted simultaneously." + ] + }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The file: tutorial_datasets\\KE07_Tutorial\\KE07_ml_analysis\\CatBoost_Pymol_Per_Res_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\KE07_Tutorial\\KE07_ml_analysis\\CatBoost_Pymol_Per_Feature_Scores.py was written to disk.\n" + "The file: tutorial_datasets/KE07_Tutorial/KE07_ml_analysis/CatBoost_Pymol_Per_Res_Scores.py was written to disk.\n", + "The file: tutorial_datasets/KE07_Tutorial/KE07_ml_analysis/CatBoost_Pymol_Per_Feature_Scores.py was written to disk.\n" ] } ], "source": [ - "# Here you do not need to specify what model you would like to output results for, all will be outputted simultaneously.\n", "pymol_projections.project_multiple_per_res_scores(\n", " all_per_res_scores=ml_post_proc.all_per_residue_scores,\n", " out_dir=ml_out_dir\n", @@ -1698,6 +1795,42 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.3.1 - For projecting the results with ChimeraX\n", + "Here you do not need to specify what model you would like to the output results for, all will be outputted simultaneously." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/KE07_Tutorial/KE07_ml_analysis/CatBoost_ChimeraX_Per_Res_Scores.cxc was written to disk.\n", + "The file: tutorial_datasets/KE07_Tutorial/KE07_ml_analysis/CatBoost_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + } + ], + "source": [ + "chimerax_projections.project_multiple_per_res_scores(\n", + " all_per_res_scores=ml_post_proc.all_per_residue_scores,\n", + " out_dir=ml_out_dir\n", + ")\n", + "\n", + "chimerax_projections.project_multiple_per_feature_scores(\n", + " all_per_feature_scores=ml_post_proc.all_per_feature_scores,\n", + " pdb_file=pdb_file,\n", + " numb_features=\"all\",\n", + " out_dir=ml_out_dir\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1722,7 +1855,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.11.9" }, "orig_nbformat": 4, "vscode": { diff --git a/tutorials/Tutorial_PTP1B_Classification_ML_Stats.ipynb b/tutorials/Tutorial_PTP1B_Classification_ML_Stats.ipynb index 22b17d8..019f963 100644 --- a/tutorials/Tutorial_PTP1B_Classification_ML_Stats.ipynb +++ b/tutorials/Tutorial_PTP1B_Classification_ML_Stats.ipynb @@ -43,51 +43,62 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: KIF in c:\\users\\rory crean\\desktop\\github\\key-interactions-finder (0.1.1)\n", - "Requirement already satisfied: plotly in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (5.4.0)\n", - "Requirement already satisfied: pandas in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.3.4)\n", - "Requirement already satisfied: numpy in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.21.4)\n", - "Requirement already satisfied: scikit-learn in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.0.1)\n", - "Requirement already satisfied: scipy in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.7.2)\n", - "Requirement already satisfied: xgboost in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.7.2)\n", - "Requirement already satisfied: catboost in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (1.1.1)\n", - "Requirement already satisfied: MDAnalysis in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (2.4.2)\n", - "Requirement already satisfied: gdown in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from KIF) (4.6.0)\n", - "Requirement already satisfied: tenacity>=6.2.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from plotly) (8.0.1)\n", - "Requirement already satisfied: six in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from plotly) (1.16.0)\n", - "Requirement already satisfied: graphviz in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from catboost->KIF) (0.20.1)\n", - "Requirement already satisfied: matplotlib in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from catboost->KIF) (3.5.0)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from pandas->KIF) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from pandas->KIF) (2021.3)\n", - "Requirement already satisfied: requests[socks] in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (2.28.1)\n", - "Requirement already satisfied: beautifulsoup4 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (4.11.1)\n", - "Requirement already satisfied: filelock in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (3.8.2)\n", - "Requirement already satisfied: tqdm in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from gdown->KIF) (4.64.1)\n", - "Requirement already satisfied: mmtf-python>=1.0.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.1.3)\n", - "Requirement already satisfied: joblib>=0.12 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.1.0)\n", - "Requirement already satisfied: fasteners in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (0.18)\n", - "Requirement already satisfied: gsd>=1.9.3 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (2.7.0)\n", - "Requirement already satisfied: biopython>=1.80 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.80)\n", - "Requirement already satisfied: GridDataFormats>=0.4.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (1.0.1)\n", - "Requirement already satisfied: threadpoolctl in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (3.0.0)\n", - "Requirement already satisfied: packaging in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (21.3)\n", - "Requirement already satisfied: networkx>=2.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from MDAnalysis->KIF) (2.8.8)\n", - "Requirement already satisfied: mrcfile in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from GridDataFormats>=0.4.0->MDAnalysis->KIF) (1.4.3)\n", - "Requirement already satisfied: fonttools>=4.22.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (4.28.1)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (1.3.2)\n", - "Requirement already satisfied: pyparsing>=2.2.1 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (3.0.6)\n", - "Requirement already satisfied: pillow>=6.2.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (8.4.0)\n", - "Requirement already satisfied: cycler>=0.10 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from matplotlib->catboost->KIF) (0.11.0)\n", - "Requirement already satisfied: msgpack>=1.0.0 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from mmtf-python>=1.0.0->MDAnalysis->KIF) (1.0.4)\n", - "Requirement already satisfied: colorama in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from tqdm->gdown->KIF) (0.4.5)\n", - "Requirement already satisfied: soupsieve>1.2 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from beautifulsoup4->gdown->KIF) (2.3.2.post1)\n", - "Requirement already satisfied: idna<4,>=2.5 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (3.4)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (2.1.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (2022.6.15)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (1.26.13)\n", - "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (from requests[socks]->gdown->KIF) (1.7.1)\n", + "Requirement already satisfied: KIF in /home/roryc/Desktop/git_projects/KIF (0.4.0)\n", + "Requirement already satisfied: plotly in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (5.22.0)\n", + "Requirement already satisfied: pandas in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.2.2)\n", + "Requirement already satisfied: numpy in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.26.4)\n", + "Requirement already satisfied: scikit-learn in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.5.0)\n", + "Requirement already satisfied: scipy in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.13.1)\n", + "Requirement already satisfied: xgboost in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.0.3)\n", + "Requirement already satisfied: catboost in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (1.2.5)\n", + "Requirement already satisfied: MDAnalysis in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.7.0)\n", + "Requirement already satisfied: MDAnalysisTests in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (2.7.0)\n", + "Requirement already satisfied: gdown in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from KIF) (5.2.0)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from plotly) (8.3.0)\n", + "Requirement already satisfied: packaging in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from plotly) (24.0)\n", + "Requirement already satisfied: graphviz in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (0.20.3)\n", + "Requirement already satisfied: matplotlib in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (3.9.0)\n", + "Requirement already satisfied: six in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from catboost->KIF) (1.16.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pandas->KIF) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pandas->KIF) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pandas->KIF) (2024.1)\n", + "Requirement already satisfied: beautifulsoup4 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (4.12.3)\n", + "Requirement already satisfied: filelock in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (3.14.0)\n", + "Requirement already satisfied: requests[socks] in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (2.32.2)\n", + "Requirement already satisfied: tqdm in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from gdown->KIF) (4.66.4)\n", + "Requirement already satisfied: GridDataFormats>=0.4.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (1.0.2)\n", + "Requirement already satisfied: mmtf-python>=1.0.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (1.1.3)\n", + "Requirement already satisfied: joblib>=0.12 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (3.5.0)\n", + "Requirement already satisfied: fasteners in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (0.19)\n", + "Requirement already satisfied: mda-xdrlib in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysis->KIF) (0.2.0)\n", + "Requirement already satisfied: pytest>=3.3.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysisTests->KIF) (8.2.1)\n", + "Requirement already satisfied: hypothesis in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from MDAnalysisTests->KIF) (6.102.6)\n", + "Requirement already satisfied: mrcfile in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from GridDataFormats>=0.4.0->MDAnalysis->KIF) (1.5.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (4.52.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (1.4.5)\n", + "Requirement already satisfied: pillow>=8 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (10.3.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from matplotlib->catboost->KIF) (3.1.2)\n", + "Requirement already satisfied: msgpack>=1.0.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from mmtf-python>=1.0.0->MDAnalysis->KIF) (1.0.8)\n", + "Requirement already satisfied: iniconfig in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pytest>=3.3.0->MDAnalysisTests->KIF) (2.0.0)\n", + "Requirement already satisfied: pluggy<2.0,>=1.5 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from pytest>=3.3.0->MDAnalysisTests->KIF) (1.5.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from beautifulsoup4->gdown->KIF) (2.5)\n", + "Requirement already satisfied: attrs>=22.2.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from hypothesis->MDAnalysisTests->KIF) (23.2.0)\n", + "Requirement already satisfied: sortedcontainers<3.0.0,>=2.1.0 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from hypothesis->MDAnalysisTests->KIF) (2.4.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (2.2.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (2024.2.2)\n", + "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages (from requests[socks]->gdown->KIF) (1.7.1)\n", "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: kaleido in c:\\anaconda3\\envs\\ml_conf_features_3.8\\lib\\site-packages (0.2.1)\n", + "Collecting kaleido\n", + " Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)\n", + "Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.9/79.9 MB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", + "\u001b[?25hInstalling collected packages: kaleido\n", + "Successfully installed kaleido-0.2.1\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -111,7 +122,8 @@ "from key_interactions_finder import model_building\n", "from key_interactions_finder import stat_modelling\n", "from key_interactions_finder import post_proccessing\n", - "from key_interactions_finder import pymol_projections" + "from key_interactions_finder import pymol_projections\n", + "from key_interactions_finder import chimerax_projections" ] }, { @@ -137,9 +149,10 @@ "output_type": "stream", "text": [ "Downloading...\n", - "From: https://drive.google.com/uc?id=1hJbwCCuTTgI4xglwu1vXyzo-yaZJbmUY\n", - "To: c:\\Users\\Rory Crean\\Desktop\\Github\\key-interactions-finder\\tutorials\\tutorial_datasets\\tutorial_dataset.zip\n", - "100%|██████████| 27.5M/27.5M [00:01<00:00, 20.0MB/s]\n" + "From (original): https://drive.google.com/uc?id=1hJbwCCuTTgI4xglwu1vXyzo-yaZJbmUY\n", + "From (redirected): https://drive.google.com/uc?id=1hJbwCCuTTgI4xglwu1vXyzo-yaZJbmUY&confirm=t&uuid=20d4726f-b7c4-4adb-b9f9-954217c2c2cd\n", + "To: /home/roryc/Desktop/git_projects/KIF/tutorials/tutorial_datasets/tutorial_dataset.zip\n", + "100%|██████████| 27.5M/27.5M [00:04<00:00, 6.88MB/s]\n" ] }, { @@ -1018,7 +1031,7 @@ "output_type": "stream", "text": [ "Jensen-Shannon (JS) distances calculated.\n", - "tutorial_datasets\\PTP1B_stat_analysis\\Jensen_Shannon_Per_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/Jensen_Shannon_Per_Feature_Scores.csv written to disk.\n", "You can also access these results via the class attribute: 'js_distances'.\n" ] } @@ -1038,7 +1051,7 @@ "output_type": "stream", "text": [ "Mutual information scores calculated.\n", - "tutorial_datasets\\PTP1B_stat_analysis\\Mutual_Information_Per_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/Mutual_Information_Per_Feature_Scores.csv written to disk.\n", "You can also access these results via the class attribute: 'mutual_infos'.\n" ] } @@ -1114,8 +1127,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\PTP1B_stat_analysis\\Jensen_Shannon_Distance_Scores_Per_Residue.csv written to disk.\n", - "tutorial_datasets\\PTP1B_stat_analysis\\Mutual_Information_Scores_Per_Residue.csv written to disk.\n" + "tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/Jensen_Shannon_Distance_Scores_Per_Residue.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/Mutual_Information_Scores_Per_Residue.csv written to disk.\n" ] } ], @@ -1136,7 +1149,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\PTP1B_stat_analysis\\Feature_Direction_Estimates.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/Feature_Direction_Estimates.csv written to disk.\n", "You can access these predictions through the 'feature_directions' class attribute.\n" ] }, @@ -1144,10 +1157,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "c:\\users\\rory crean\\desktop\\github\\key-interactions-finder\\key_interactions_finder\\post_proccessing.py:748: UserWarning: Warning, this method is very simplistic and just calculates the average contact score/strength for all features for both classes to determine the direction each feature appears to favour. You should therefore interpret these results with care...\n", - " warnings.warn(warning_message)\n", - "c:\\users\\rory crean\\desktop\\github\\key-interactions-finder\\key_interactions_finder\\post_proccessing.py:758: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n", - " avg_contact_scores[class_name] = class_observations.mean()\n" + "/home/roryc/Desktop/git_projects/KIF/key_interactions_finder/post_proccessing.py:774: UserWarning: Warning, this method is very simplistic and just calculates the average contact score/strength for all features for both classes to determine the direction each feature appears to favour. You should therefore interpret these results with care...\n", + " warnings.warn(warning_message)\n" ] } ], @@ -1202,7 +1213,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1275,7 +1286,7 @@ "4 0.08 0.036506 0.012218" ] }, - "execution_count": 23, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1293,13 +1304,13 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ - "00.20.40.60.8105101520variableClosed WPD-LoopOpen WPD-LoopComparison of the KDEs obtained for the Closed and Open WPD-loopNormalised Interaction StrengthDensity" + "00.20.40.60.8105101520variableClosed WPD-LoopOpen WPD-LoopComparison of the KDEs obtained for the Closed and Open WPD-loopNormalised Interaction StrengthDensity" ] }, "metadata": {}, @@ -1332,14 +1343,14 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "c:\\anaconda3\\envs\\ML_conf_features_3.8\\lib\\site-packages\\MDAnalysis\\topology\\PDBParser.py:328: UserWarning:\n", + "/home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.py:331: UserWarning:\n", "\n", "Element information is missing, elements attribute will not be populated. If needed these can be guessed using MDAnalysis.topology.guessers.\n", "\n" @@ -1349,7 +1360,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets//PTP1B_stat_analysisDistance_toCalpha_D181_Closed.txt written to disk.\n" + "tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysisDistance_toCalpha_D181_Closed.txt written to disk.\n" ] } ], @@ -1379,7 +1390,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1446,7 +1457,7 @@ "178 0.58728 3.68" ] }, - "execution_count": 26, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1463,13 +1474,13 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ - "05010015020025030005101520253035Bubble Chart of the Per Residue Score for each ResidueResidue NumberDistance to D181 (Å)" + "05010015020025030005101520253035Bubble Chart of the Per Residue Score for each ResidueResidue NumberDistance to D181 (Å)" ] }, "metadata": {}, @@ -1496,13 +1507,13 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ - "22018017926517819011111411918118229618927221519226828718718422429115119611022726219125329418618325615019415242197109260298115149195193176442711856022327926110767269173632493945482752592122972264396295290273231382672842661556919541882551181002146821755941222452782779218108125440137153346516810621627199217945117728517429320027413620270161233221229123228170264158121159213289211352368920728823771208246937658620318241270172148138139981574998023141248785628616721025820132876853112146134521171565729224266372192616016516162104204280332817924316925014084751337723810236831435515312514231112428319849263131927251122252479088240648520917112212630292322391477216616350621542197101144582351351459516474244171410321325284647617391105108113116120124127128129130132205206208222230234252257276282051015202530354000.20.40.60.81Distance to D181 (Å)Per Residue Score" + "22018017926517819011111411918118229618927221519226828718718422429115119611022726219125329418618325615019415242197109260298115149195193176442711856022327926110767269173632493945482752592122972264396295290273231382672842661556919541882551181002146821755941222452782779218108125440137153346516810621627199217945117728517429320027413620270161233221229123228170264158121159213289211352368920728823771208246937658620318241270172148138139981574998023141248785628616721025820132876853112146134521171565729224266372192616016516162104204280332817924316925014084751337723810236831435515312514231112428319849263131927251122252479088240648520917112212630292322391477216616350621542197101144582351351459516474244171410321325284647617391105108113116120124127128129130132205206208222230234252257276282051015202530354000.20.40.60.81Distance to D181 (Å)Per Residue Score" ] }, "metadata": {}, @@ -1539,29 +1550,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Part 1.3. Project the Results onto Protein Structures with the pymol_projections.py module. \n", - " \n", - "Naturally, we may want to visualise some of the results we have generated above onto a protein structure. We can take advantage of\n", - "the functions provided in the pymol_projections.py module to do this. \n", + "### Part 1.3. Project the Results onto 3D Protein Structures. \n", + "\n", + "Naturally, we may want to visualise some of the results we have generated above onto a protein structure. \n", "\n", - "As the name suggests this will output [PyMOL](https://pymol.org/) compatible python scripts which can be run to represent the results\n", - "at the: \n", + "We can take advantage of the functions provided in either of the following files: \n", "\n", - "1. Per feature level. (Cylinders are drawn between each feature, with the cylinder radii marking how strong the relative difference is. \n", + "1. pymol_projections.py - will output [PyMOL](https://pymol.org/) compatible python scripts\n", + "2. chimerax_projections.py will output [ChimeraX](https://www.cgl.ucsf.edu/chimerax/) compatible scripts\n", + "\n", + "Both modules can be used to represent the results at the:\n", + "1. Per feature/interaction level. (Cylinders are drawn between each feature, with the cylinder radii marking how strong the relative difference is. \n", "2. Per residue level. The Carbon alpha of each residue will be depicted as a sphere, with the sphere radii depicting how strong the the relative difference is. " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3.1 PyMOL Projections" + ] + }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The file: tutorial_datasets\\PTP1B_stat_analysis\\jensen_shannon_Pymol_Per_Feature_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_stat_analysis\\mutual_information_Pymol_Per_Feature_Scores.py was written to disk.\n" + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/jensen_shannon_Pymol_Per_Feature_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/mutual_information_Pymol_Per_Feature_Scores.py was written to disk.\n" ] } ], @@ -1585,15 +1605,15 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The file: tutorial_datasets\\PTP1B_stat_analysis\\jensen_shannon_Pymol_Per_Res_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_stat_analysis\\mutual_information_Pymol_Per_Res_Scores.py was written to disk.\n" + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/jensen_shannon_Pymol_Per_Res_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/mutual_information_Pymol_Per_Res_Scores.py was written to disk.\n" ] } ], @@ -1613,12 +1633,109 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3.2 ChimeraX Projections" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.py:331: UserWarning:\n", + "\n", + "Element information is missing, elements attribute will not be populated. If needed these can be guessed using MDAnalysis.topology.guessers.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/jensen_shannon_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.py:331: UserWarning:\n", + "\n", + "Element information is missing, elements attribute will not be populated. If needed these can be guessed using MDAnalysis.topology.guessers.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/mutual_information_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + } + ], + "source": [ + "# Write ChimeraX compatable scripts for the per feature results.\n", + "# Simply swap between the two statistical methods as shown below. \n", + "chimerax_projections.project_chimerax_top_features(\n", + " per_feature_scores=stat_model.js_distances,\n", + " model_name=\"jensen_shannon\",\n", + " pdb_file=pdb_file,\n", + " numb_features=125, # can be any integer values or \"all\" if you would like all features returned.\n", + " out_dir=stats_out_dir\n", + ")\n", + "\n", + "chimerax_projections.project_chimerax_top_features(\n", + " per_feature_scores=stat_model.mutual_infos,\n", + " model_name=\"mutual_information\",\n", + " pdb_file=pdb_file,\n", + " numb_features=125, # can be any integer values or \"all\" if you would like all features returned.\n", + " out_dir=stats_out_dir\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/jensen_shannon_ChimeraX_Per_Res_Scores.cxc was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_stat_analysis/mutual_information_ChimeraX_Per_Res_Scores.cxc was written to disk.\n" + ] + } + ], + "source": [ + "# Write ChimeraX compatable scripts for the per residue results.\n", + "# Simply swap between the two statistical methods as shown below. \n", + "chimerax_projections.project_chimerax_per_res_scores(\n", + " per_res_scores=js_per_res_scores,\n", + " model_name=\"jensen_shannon\",\n", + " out_dir=stats_out_dir\n", + ")\n", + "\n", + "chimerax_projections.project_chimerax_per_res_scores(\n", + " per_res_scores=mi_per_res_scores,\n", + " model_name=\"mutual_information\",\n", + " out_dir=stats_out_dir\n", + ")" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Heres an example of the figures you can make , see the manuscript for more examples and what the figures represent.\n", + "Heres an example of the types of figures you can make, see the manuscript for more examples and what the figures represent.\n", "\n", "
\"Drawing\"
" ] @@ -1647,7 +1764,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -1811,7 +1928,7 @@ "[3 rows x 360 columns]" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -1832,7 +1949,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -1891,16 +2008,16 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Model saved to disk at: temporary_files\\CatBoost_Model.pickle\n", - "Model saved to disk at: temporary_files\\XGBoost_Model.pickle\n", - "Model saved to disk at: temporary_files\\Random_Forest_Model.pickle\n", + "Model saved to disk at: temporary_files/CatBoost_Model.pickle\n", + "Model saved to disk at: temporary_files/XGBoost_Model.pickle\n", + "Model saved to disk at: temporary_files/Random_Forest_Model.pickle\n", "Model building complete, returning final results with train/test datasets to you.\n" ] }, @@ -1937,25 +2054,25 @@ " 0\n", " CatBoost\n", " {'iterations': 100}\n", - " 0.983748\n", - " 0.002514\n", - " 1.42\n", + " 0.983829\n", + " 0.002235\n", + " 0.45\n", " \n", " \n", " 1\n", " XGBoost\n", " {'n_estimators': 100}\n", - " 0.983179\n", - " 0.003532\n", - " 1.95\n", + " 0.982407\n", + " 0.002226\n", + " 0.50\n", " \n", " \n", " 2\n", " Random_Forest\n", " {'max_depth': 100, 'n_estimators': 100}\n", - " 0.971153\n", - " 0.006106\n", - " 2.13\n", + " 0.970990\n", + " 0.003206\n", + " 2.02\n", " \n", " \n", "\n", @@ -1963,17 +2080,17 @@ ], "text/plain": [ " model best_params best_score \\\n", - "0 CatBoost {'iterations': 100} 0.983748 \n", - "1 XGBoost {'n_estimators': 100} 0.983179 \n", - "2 Random_Forest {'max_depth': 100, 'n_estimators': 100} 0.971153 \n", + "0 CatBoost {'iterations': 100} 0.983829 \n", + "1 XGBoost {'n_estimators': 100} 0.982407 \n", + "2 Random_Forest {'max_depth': 100, 'n_estimators': 100} 0.970990 \n", "\n", " best_standard_deviation Time taken to build model (minutes) \n", - "0 0.002514 1.42 \n", - "1 0.003532 1.95 \n", - "2 0.006106 2.13 " + "0 0.002235 0.45 \n", + "1 0.002226 0.50 \n", + "2 0.003206 2.02 " ] }, - "execution_count": 33, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1996,7 +2113,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -2013,7 +2130,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -2046,37 +2163,37 @@ " \n", " \n", " Closed\n", - " 0.975124\n", - " 0.989899\n", - " 0.982456\n", - " 792.0\n", + " 0.985751\n", + " 0.989597\n", + " 0.98767\n", + " 769.0\n", " \n", " \n", " Open\n", - " 0.987578\n", - " 0.969512\n", - " 0.978462\n", - " 656.0\n", + " 0.988166\n", + " 0.9838\n", + " 0.985978\n", + " 679.0\n", " \n", " \n", " accuracy\n", " N/A\n", " N/A\n", - " 0.980663\n", - " 1448\n", + " 0.986878\n", + " 1448.0\n", " \n", " \n", " macro avg\n", - " 0.981351\n", - " 0.979706\n", - " 0.980459\n", + " 0.986958\n", + " 0.986698\n", + " 0.986824\n", " 1448.0\n", " \n", " \n", " weighted avg\n", - " 0.980766\n", - " 0.980663\n", - " 0.980646\n", + " 0.986883\n", + " 0.986878\n", + " 0.986877\n", " 1448.0\n", " \n", " \n", @@ -2085,14 +2202,14 @@ ], "text/plain": [ " precision recall f1-score support\n", - "Closed 0.975124 0.989899 0.982456 792.0\n", - "Open 0.987578 0.969512 0.978462 656.0\n", - "accuracy N/A N/A 0.980663 1448\n", - "macro avg 0.981351 0.979706 0.980459 1448.0\n", - "weighted avg 0.980766 0.980663 0.980646 1448.0" + "Closed 0.985751 0.989597 0.98767 769.0\n", + "Open 0.988166 0.9838 0.985978 679.0\n", + "accuracy N/A N/A 0.986878 1448.0\n", + "macro avg 0.986958 0.986698 0.986824 1448.0\n", + "weighted avg 0.986883 0.986878 0.986877 1448.0" ] }, - "execution_count": 35, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2103,7 +2220,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -2136,37 +2253,37 @@ " \n", " \n", " Closed\n", - " 0.979975\n", - " 0.988636\n", - " 0.984287\n", - " 792.0\n", + " 0.987047\n", + " 0.990897\n", + " 0.988968\n", + " 769.0\n", " \n", " \n", " Open\n", - " 0.986133\n", - " 0.97561\n", - " 0.980843\n", - " 656.0\n", + " 0.989645\n", + " 0.985272\n", + " 0.987454\n", + " 679.0\n", " \n", " \n", " accuracy\n", " N/A\n", " N/A\n", - " 0.982735\n", - " 1448\n", + " 0.98826\n", + " 1448.0\n", " \n", " \n", " macro avg\n", - " 0.983054\n", - " 0.982123\n", - " 0.982565\n", + " 0.988346\n", + " 0.988085\n", + " 0.988211\n", " 1448.0\n", " \n", " \n", " weighted avg\n", - " 0.982765\n", - " 0.982735\n", - " 0.982726\n", + " 0.988265\n", + " 0.98826\n", + " 0.988258\n", " 1448.0\n", " \n", " \n", @@ -2175,14 +2292,14 @@ ], "text/plain": [ " precision recall f1-score support\n", - "Closed 0.979975 0.988636 0.984287 792.0\n", - "Open 0.986133 0.97561 0.980843 656.0\n", - "accuracy N/A N/A 0.982735 1448\n", - "macro avg 0.983054 0.982123 0.982565 1448.0\n", - "weighted avg 0.982765 0.982735 0.982726 1448.0" + "Closed 0.987047 0.990897 0.988968 769.0\n", + "Open 0.989645 0.985272 0.987454 679.0\n", + "accuracy N/A N/A 0.98826 1448.0\n", + "macro avg 0.988346 0.988085 0.988211 1448.0\n", + "weighted avg 0.988265 0.98826 0.988258 1448.0" ] }, - "execution_count": 36, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -2193,7 +2310,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -2226,37 +2343,37 @@ " \n", " \n", " Closed\n", - " 0.961586\n", - " 0.979798\n", - " 0.970607\n", - " 792.0\n", + " 0.975293\n", + " 0.975293\n", + " 0.975293\n", + " 769.0\n", " \n", " \n", " Open\n", - " 0.975039\n", - " 0.952744\n", - " 0.963763\n", - " 656.0\n", + " 0.972018\n", + " 0.972018\n", + " 0.972018\n", + " 679.0\n", " \n", " \n", " accuracy\n", " N/A\n", " N/A\n", - " 0.967541\n", - " 1448\n", + " 0.973757\n", + " 1448.0\n", " \n", " \n", " macro avg\n", - " 0.968313\n", - " 0.966271\n", - " 0.967185\n", + " 0.973655\n", + " 0.973655\n", + " 0.973655\n", " 1448.0\n", " \n", " \n", " weighted avg\n", - " 0.967681\n", - " 0.967541\n", - " 0.967506\n", + " 0.973757\n", + " 0.973757\n", + " 0.973757\n", " 1448.0\n", " \n", " \n", @@ -2265,14 +2382,14 @@ ], "text/plain": [ " precision recall f1-score support\n", - "Closed 0.961586 0.979798 0.970607 792.0\n", - "Open 0.975039 0.952744 0.963763 656.0\n", - "accuracy N/A N/A 0.967541 1448\n", - "macro avg 0.968313 0.966271 0.967185 1448.0\n", - "weighted avg 0.967681 0.967541 0.967506 1448.0" + "Closed 0.975293 0.975293 0.975293 769.0\n", + "Open 0.972018 0.972018 0.972018 679.0\n", + "accuracy N/A N/A 0.973757 1448.0\n", + "macro avg 0.973655 0.973655 0.973655 1448.0\n", + "weighted avg 0.973757 0.973757 0.973757 1448.0" ] }, - "execution_count": 37, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -2295,7 +2412,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -2304,17 +2421,17 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[784, 8],\n", - " [ 20, 636]], dtype=int64)" + "array([[761, 8],\n", + " [ 11, 668]])" ] }, - "execution_count": 39, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -2333,13 +2450,13 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ - "ClosedOpenOpen Closed Confusion Matrix Obtained for XGBoost using the Validation/Holdout set784820636" + "ClosedOpenOpen Closed Confusion Matrix Obtained for XGBoost using the Validation/Holdout set761811668" ] }, "metadata": {}, @@ -2368,7 +2485,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -2386,16 +2503,16 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\PTP1B_ml_analysis\\CatBoost_Feature_Scores.csv written to disk.\n", - "tutorial_datasets\\PTP1B_ml_analysis\\XGBoost_Feature_Scores.csv written to disk.\n", - "tutorial_datasets\\PTP1B_ml_analysis\\Random_Forest_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/CatBoost_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/XGBoost_Feature_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/Random_Forest_Feature_Scores.csv written to disk.\n", "All per feature scores have now been saved to disk.\n" ] } @@ -2407,16 +2524,16 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tutorial_datasets\\PTP1B_ml_analysis\\CatBoost_Per_Residue_Scores.csv written to disk.\n", - "tutorial_datasets\\PTP1B_ml_analysis\\XGBoost_Per_Residue_Scores.csv written to disk.\n", - "tutorial_datasets\\PTP1B_ml_analysis\\Random_Forest_Per_Residue_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/CatBoost_Per_Residue_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/XGBoost_Per_Residue_Scores.csv written to disk.\n", + "tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/Random_Forest_Per_Residue_Scores.csv written to disk.\n", "All per residue scores have now been saved to disk.\n" ] } @@ -2428,7 +2545,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -2455,7 +2572,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -2465,7 +2582,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -2497,39 +2614,39 @@ " \n", " \n", " \n", - " 194\n", + " 205\n", " 2\n", " 0.00000\n", " 0.00000\n", " 0.00000\n", " \n", " \n", - " 18\n", + " 15\n", " 3\n", - " 0.07506\n", - " 0.04290\n", - " 0.00966\n", + " 0.08886\n", + " 0.03822\n", + " 0.00860\n", " \n", " \n", - " 84\n", + " 86\n", " 4\n", - " 0.01093\n", - " 0.04671\n", - " 0.01033\n", + " 0.00891\n", + " 0.03985\n", + " 0.01029\n", " \n", " \n", - " 166\n", + " 150\n", " 5\n", - " 0.00085\n", - " 0.02903\n", - " 0.00762\n", + " 0.00167\n", + " 0.00000\n", + " 0.00823\n", " \n", " \n", - " 167\n", + " 176\n", " 6\n", - " 0.00085\n", - " 0.00500\n", - " 0.01097\n", + " 0.00075\n", + " 0.00456\n", + " 0.01100\n", " \n", " \n", "\n", @@ -2537,14 +2654,14 @@ ], "text/plain": [ " Residue Number CatBoost XGBoost Random_Forest\n", - "194 2 0.00000 0.00000 0.00000\n", - "18 3 0.07506 0.04290 0.00966\n", - "84 4 0.01093 0.04671 0.01033\n", - "166 5 0.00085 0.02903 0.00762\n", - "167 6 0.00085 0.00500 0.01097" + "205 2 0.00000 0.00000 0.00000\n", + "15 3 0.08886 0.03822 0.00860\n", + "86 4 0.00891 0.03985 0.01029\n", + "150 5 0.00167 0.00000 0.00823\n", + "176 6 0.00075 0.00456 0.01100" ] }, - "execution_count": 46, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -2560,13 +2677,13 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ - "5010015020025000.20.40.60.81ML ModelsCatBoostXGBoostRandom_ForestPer Residue Scores for All 3 Machine Learning ModelsResidue NumberPer Residue Score" + "5010015020025000.20.40.60.81ML ModelsCatBoostXGBoostRandom_ForestPer Residue Scores for All 3 Machine Learning ModelsResidue NumberPer Residue Score" ] }, "metadata": {}, @@ -2591,31 +2708,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Part 2.3. Project the Results onto Protein Structures with the pymol_projections.py module. \n", - " \n", + "### Part 2.3. Project the Results onto Protein Structures \n", + "\n", "This section is essentially identical to 1.3, only that now we will output the ml results instead of the stats results" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2.3.1 - For projecting the results with PyMOL \n", + "Here you do not need to specify what model you would like to the output results for, all will be outputted simultaneously." + ] + }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The file: tutorial_datasets\\PTP1B_ml_analysis\\CatBoost_Pymol_Per_Res_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_ml_analysis\\XGBoost_Pymol_Per_Res_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_ml_analysis\\Random_Forest_Pymol_Per_Res_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_ml_analysis\\CatBoost_Pymol_Per_Feature_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_ml_analysis\\XGBoost_Pymol_Per_Feature_Scores.py was written to disk.\n", - "The file: tutorial_datasets\\PTP1B_ml_analysis\\Random_Forest_Pymol_Per_Feature_Scores.py was written to disk.\n" + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/CatBoost_Pymol_Per_Res_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/XGBoost_Pymol_Per_Res_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/Random_Forest_Pymol_Per_Res_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/CatBoost_Pymol_Per_Feature_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/XGBoost_Pymol_Per_Feature_Scores.py was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/Random_Forest_Pymol_Per_Feature_Scores.py was written to disk.\n" ] } ], "source": [ - "# Here you do not need to specify what model you would like to the output results for, all will be outputted simultaneously.\n", "pymol_projections.project_multiple_per_res_scores(\n", " all_per_res_scores=ml_post_proc.all_per_residue_scores,\n", " out_dir=ml_out_dir\n", @@ -2632,7 +2756,90 @@ "attachments": {}, "cell_type": "markdown", "metadata": {}, - "source": [] + "source": [ + "#### 2.3.1 - For projecting the results with ChimeraX\n", + "Here you do not need to specify what model you would like to the output results for, all will be outputted simultaneously." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/CatBoost_ChimeraX_Per_Res_Scores.cxc was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/XGBoost_ChimeraX_Per_Res_Scores.cxc was written to disk.\n", + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/Random_Forest_ChimeraX_Per_Res_Scores.cxc was written to disk.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.py:331: UserWarning:\n", + "\n", + "Element information is missing, elements attribute will not be populated. If needed these can be guessed using MDAnalysis.topology.guessers.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/CatBoost_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.py:331: UserWarning:\n", + "\n", + "Element information is missing, elements attribute will not be populated. If needed these can be guessed using MDAnalysis.topology.guessers.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/XGBoost_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/roryc/miniconda3/envs/kif_py_3_11/lib/python3.11/site-packages/MDAnalysis/topology/PDBParser.py:331: UserWarning:\n", + "\n", + "Element information is missing, elements attribute will not be populated. If needed these can be guessed using MDAnalysis.topology.guessers.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file: tutorial_datasets/PTP1B_Tutorial/PTP1B_ml_analysis/Random_Forest_ChimeraX_Per_Feature_Scores.cxc was written to disk.\n" + ] + } + ], + "source": [ + "chimerax_projections.project_multiple_per_res_scores(\n", + " all_per_res_scores=ml_post_proc.all_per_residue_scores,\n", + " out_dir=ml_out_dir\n", + ")\n", + "\n", + "chimerax_projections.project_multiple_per_feature_scores(\n", + " all_per_feature_scores=ml_post_proc.all_per_feature_scores,\n", + " pdb_file=pdb_file,\n", + " numb_features=\"all\",\n", + " out_dir=ml_out_dir\n", + ")" + ] } ], "metadata": { @@ -2651,7 +2858,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.11.9" }, "orig_nbformat": 4, "vscode": {