Skip to content

Commit

Permalink
Bugfixes (#153)
Browse files Browse the repository at this point in the history
* fix in docs of PrecomputedKNN

* requirements: do not allow numpy 1.24

* - made tqdm a DeepRC-only dependency
- remove 'requests' requirement

* moved hdf5 to optional DeepRC dependencies, and note in DesignMatrixExporter that it must be installed to be used as an export format

* remove hdf5 from tests as it is not automatically installed as a dependency

* move notes to Trello and remove commented code

---------

Co-authored-by: pavlovicmilena <[email protected]>
  • Loading branch information
LonnekeScheffer and pavlovicmilena authored Mar 30, 2023
1 parent 6c44f71 commit 551dec4
Show file tree
Hide file tree
Showing 9 changed files with 21 additions and 28 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,26 +164,25 @@ If you would like to make contributions, for example by adding a new ML method,
- [editdistance](https://pypi.org/project/editdistance/) (0.5.3 or higher)
- [fishersapi](https://pypi.org/project/fishersapi/)
- [gensim](https://pypi.org/project/gensim/) (3.8 or higher, < 4)
- [h5py](https://www.h5py.org/) (2.10.0 or lower when using the optional DeepRC dependency)
- [logomaker](https://pypi.org/project/logomaker/) (0.8 or higher)
- [matplotlib](https://matplotlib.org) (3.1 or higher)
- [matplotlib-venn](https://pypi.org/project/matplotlib-venn/) (0.11 or higher)
- [numpy](https://www.numpy.org/) (1.18 or higher)
- [numpy](https://www.numpy.org/) (1.18 or higher, but at most 1.23.5)
- [pandas](https://pandas.pydata.org/) (1 or higher)
- [plotly](https://plotly.com/python/) (4 or higher)
- [pystache](https://pypi.org/project/pystache/) (0.5.4)
- [Pytorch](https://pytorch.org/) (1.5.1 or higher)
- [PyYAML](https://pyyaml.org) (5.3 or higher)
- [regex](https://pypi.org/project/regex/)
- [requests](https://requests.readthedocs.io/) (2.21 or higher)
- [scikit-learn](https://scikit-learn.org/) (0.23 or higher)
- [scipy](https://www.scipy.org)
- [tensorboard](https://www.tensorflow.org/tensorboard) (1.14.0 or higher)
- [tqdm](https://tqdm.github.io/) (0.24 or higher)
- [tzlocal](https://pypi.org/project/tzlocal/)
- Optional dependencies when using DeepRC:
- [DeepRC](https://github.com/ml-jku/DeepRC) (0.0.1)
- [widis-lstm-tools](https://github.com/widmi/widis-lstm-tools) (0.4)
- [tqdm](https://tqdm.github.io/) (0.24 or higher)
- [h5py](https://www.h5py.org/) (2.10.0 or lower when using DeepRC 0.0.1)
- Optional dependencies when using TCRdist:
- [parasail](https://pypi.org/project/parasail/) (1.2)
- [tcrdist3](https://github.com/kmayerb/tcrdist3) (0.1.6 or higher)
Expand Down
6 changes: 4 additions & 2 deletions immuneML/ml_methods/DeepRC.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
import warnings
from pathlib import Path

import h5py
import numpy as np
import pkg_resources
import torch
import yaml
from sklearn.exceptions import NotFittedError
from sklearn.model_selection import train_test_split
from tqdm import tqdm

from immuneML.caching.CacheHandler import CacheHandler
from immuneML.data_model.encoded_data.EncodedData import EncodedData
Expand Down Expand Up @@ -168,6 +166,8 @@ def _metadata_to_hdf5(self, metadata_filepath: Path, label_name: str):
return hdf5_filepath

def _load_dataset_in_ram(self, hdf5_filepath: Path):
import h5py

with h5py.File(str(hdf5_filepath), 'r') as hf:
pre_loaded_hdf5_file = dict()
pre_loaded_hdf5_file['seq_lens'] = hf['sampledata']['seq_lens'][:]
Expand Down Expand Up @@ -329,6 +329,8 @@ def predict_proba(self, encoded_data: EncodedData, label: Label):

def _model_predict(self, model, dataloader):
"""Based on the DeepRC function evaluate (deeprc.deeprc_binary.training.evaluate)"""
from tqdm import tqdm

with torch.no_grad():
model.to(device=self.pytorch_device)
scoring_predictions = []
Expand Down
4 changes: 2 additions & 2 deletions immuneML/ml_methods/PrecomputedKNN.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PrecomputedKNN(SklearnMethod):
.. code-block:: yaml
my_knn_method:
KNN:
PrecomputedKNN:
# sklearn parameters (same names as in original sklearn class)
weights: uniform # always use this setting for weights
n_neighbors: [5, 10, 15] # find the optimal number of neighbors
Expand All @@ -35,7 +35,7 @@ class PrecomputedKNN(SklearnMethod):
model_selection_cv: True
model_selection_n_folds: 5
# alternative way to define ML method with default values:
my_default_knn: KNN
my_default_knn: PrecomputedKNN
"""

Expand Down
5 changes: 3 additions & 2 deletions immuneML/reports/encoding_reports/DesignMatrixExporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dataclasses import dataclass
from pathlib import Path

import h5py
import numpy as np
import pandas as pd
import yaml
Expand All @@ -29,6 +28,7 @@ class DesignMatrixExporter(EncodingReport):
file_format (str): the format and extension of the file to store the design matrix. The supported formats are:
npy, csv, hdf5, npy.zip, csv.zip or hdf5.zip.
Note: when using hdf5 or hdf5.zip output formats, make sure the 'hdf5' dependency is installed.
YAML specification:
Expand Down Expand Up @@ -72,7 +72,8 @@ def _export_matrix(self) -> ReportOutput:
file_path = file_path.with_suffix('.' + ext)

# Use h5py to create a hdf5 file.
if ext == "hdf5":
if ext == "hdf5":
import h5py
with h5py.File(str(file_path), 'w') as hf_object:
hf_object.create_dataset(str(file_path), data=data)
# Use numpy to create a csv or npy file.
Expand Down
3 changes: 2 additions & 1 deletion immuneML/reports/ml_reports/DeepRCMotifDiscovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import torch
from matplotlib import pyplot as plt
from tqdm import tqdm

from immuneML.data_model.dataset.Dataset import Dataset
from immuneML.hyperparameter_optimization.HPSetting import HPSetting
Expand Down Expand Up @@ -129,6 +128,8 @@ def compute_contributions(self, intgrds_set_loader: torch.utils.data.DataLoader,
path_kernels : Path
path for kernels integrated gradients plot
"""
from tqdm import tqdm

intgrds_set = intgrds_set_loader.dataset

#
Expand Down
5 changes: 1 addition & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
numpy>=1.18.5
numpy>=1.18.5,<=1.23.5
pytest>=4
pandas>=1
PyYAML>=5.3
Expand All @@ -12,11 +12,8 @@ airr>=1,<1.4
fishersapi
pystache
torch>=1.5.1
h5py<=2.10.0
dill>=0.3
tqdm>=0.24
tensorboard>=1.14.0
requests>=2.21
plotly>=4
logomaker>=0.8
matplotlib-venn>=0.11
Expand Down
4 changes: 3 additions & 1 deletion requirements_DeepRC.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
git+https://github.com/ml-jku/DeepRC@fec4b4f4b2cd70e00e8de83da169560dec73a419
git+https://github.com/widmi/widis-lstm-tools@d94edfc854477cca6f492d4c0abc37c7fd70667b
git+https://github.com/widmi/widis-lstm-tools@d94edfc854477cca6f492d4c0abc37c7fd70667b
tqdm>=0.24
h5py<=2.10.0
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ def import_requirements(filename) -> list:
author="immuneML Team",
author_email="[email protected]",
url="https://github.com/uio-bmi/immuneML",
install_requires=["numpy>=1.18.5", "pytest>=4", "pandas>=1", "PyYAML>=5.3", "scikit-learn>=0.23", "gensim>=3.8,<4", "matplotlib>=3.1",
"editdistance==0.5.3", "regex", "tzlocal", "airr>=1,<1.4", "pystache", "torch>=1.5.1", "Cython", "h5py<=2.10.0", "dill>=0.3",
"tqdm>=0.24", # Note: h5py v3 does not work with DeepRC, but works with everything else
"tensorboard>=1.14.0", "requests>=2.21", "plotly>=4", "logomaker>=0.8", "fishersapi", "matplotlib-venn>=0.11", "scipy"],
install_requires=["numpy>=1.18.5,<=1.23.5", "pytest>=4", "pandas>=1", "PyYAML>=5.3", "scikit-learn>=0.23", "gensim>=3.8,<4", "matplotlib>=3.1",
"editdistance==0.5.3", "regex", "tzlocal", "airr>=1,<1.4", "fishersapi", "pystache", "torch>=1.5.1", "dill>=0.3",
"tensorboard>=1.14.0", "plotly>=4", "logomaker>=0.8", "matplotlib-venn>=0.11", "scipy", "Cython"],
extras_require={
"TCRdist": ["parasail==1.2", "tcrdist3>=0.1.6"]
},
Expand Down
8 changes: 0 additions & 8 deletions test/reports/encoding_reports/test_DesignMatrixExporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,5 @@ def test_exporter(self):
report._export_matrix()
self.assertTrue(os.path.isfile(path / "design_matrix.npy.zip"))

report.file_format = 'hdf5'
report._export_matrix()
self.assertTrue(os.path.isfile(path / "design_matrix.hdf5"))
report.file_format = 'hdf5.zip'
report._export_matrix()
self.assertTrue(os.path.isfile(path / "design_matrix.hdf5.zip"))
shutil.rmtree(path)

with self.assertRaises(AssertionError):
DesignMatrixExporter.build_object(**{'file_format': "random"})

0 comments on commit 551dec4

Please sign in to comment.