diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 000000000..576fa10c1 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,41 @@ +# This workflow will install the ml4h Python package and run its tests. +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test ml4h Python package + +on: + workflow_dispatch: + # Allows manually triggering workflow in GitHub UI on selected branch. + # GitHub doc: https://docs.github.com/en/free-pro-team@latest/actions/reference/events-that-trigger-workflows#workflow_dispatch. + # GitHub blog demo: https://github.blog/changelog/2020-07-06-github-actions-manual-triggers-with-workflow_dispatch/. + + push: + branches: [ master ] + + pull_request: + branches: [ master ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.6, 3.7] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + # Install the ml4h Python package. + pip install . + - name: Test with pytest + run: | + pytest tests -m "not slow" + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2e2006c07..929e5a930 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,10 +1,10 @@ # Contributing -1. Before making a substantial pull request, consider first [filing an issue](https://github.com/broadinstitute/ml/issues) describing the feature addition or change you wish to make. +1. Before making a substantial pull request, consider first [filing an issue](https://github.com/broadinstitute/ml4h/issues) describing the feature addition or change you wish to make. 1. [Get setup](#setup-for-code-contributions) 1. [Follow the coding style](#python-coding-style) 1. [Test your code](#testing) -1. Send a [pull request](https://github.com/broadinstitute/ml/pulls) +1. Send a [pull request](https://github.com/broadinstitute/ml4h/pulls) ## Setup for code contributions @@ -13,7 +13,7 @@ Small typos in code or documentation may be edited directly using the GitHub web interface. Otherwise: 1. If you are new to GitHub, don't start here. Instead, work through a GitHub tutorial such as https://guides.github.com/activities/hello-world/. -1. Create a fork of https://github.com/broadinstitute/ml +1. Create a fork of https://github.com/broadinstitute/ml4h 1. Clone your fork. 1. Work from a feature branch. See the [Appendix](#appendix) for detailed `git` commands. @@ -25,11 +25,11 @@ Small typos in code or documentation may be edited directly using the GitHub web # Install pre-commit pip3 install pre-commit # Install the git hook scripts by running this within the git clone directory -cd ${HOME}/ml +cd ${HOME}/ml4h pre-commit install ``` -See [.pre-commit-config.yaml](https://github.com/broadinstitute/ml/blob/master/.pre-commit-config.yaml) for the currently configured pre-commit hooks for ml4cvd. +See [.pre-commit-config.yaml](https://github.com/broadinstitute/ml4h/blob/master/.pre-commit-config.yaml) for the currently configured pre-commit hooks for ml4h. ### Install git-secrets @@ -50,11 +50,11 @@ git config --global init.templateDir ~/.git-templates/git-secrets We maintain our own custom "provider" to cover any private keys or other critical data that we would like to avoid committing to our repositories. Feel free to add ```egrep```-compatible regular expressions to -```git_secrets_provider_ml4cvd.txt``` to match types of critical data that are not currently covered by the patterns in that +```git_secrets_provider_ml4h.txt``` to match types of critical data that are not currently covered by the patterns in that file. To register the patterns in this file with ```git-secrets```: ``` -git secrets --add-provider -- cat ${HOME}/ml/git_secrets_provider_ml4cvd.txt +git secrets --add-provider -- cat ${HOME}/ml4h/git_secrets_provider_ml4h.txt ``` ### Install pylint @@ -66,16 +66,16 @@ git secrets --add-provider -- cat ${HOME}/ml/git_secrets_provider_ml4cvd.txt pip3 install pylint ``` -See [pylintrc](https://github.com/broadinstitute/ml/blob/master/pylintrc) for the current lint configuration for ml4cvd. +See [pylintrc](https://github.com/broadinstitute/ml4h/blob/master/pylintrc) for the current lint configuration for ml4h. # Python coding style -Changes to ml4cvd should conform to [PEP 8 -- Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/). See also [Google Python Style Guide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md) as another decription of this coding style. +Changes to ml4h should conform to [PEP 8 -- Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/). See also [Google Python Style Guide](https://github.com/google/styleguide/blob/gh-pages/pyguide.md) as another decription of this coding style. Use `pylint` to check your Python changes: ```bash -pylint --rcfile=${HOME}/ml/pylintrc myfile.py +pylint --rcfile=${HOME}/ml4h/pylintrc myfile.py ``` Any messages returned by `pylint` are intended to be self-explanatory, but that isn't always the case. @@ -89,32 +89,32 @@ Any messages returned by `pylint` are intended to be self-explanatory, but that Unit tests can be run in Docker with ``` -${HOME}/ml/scripts/tf.sh -T ${HOME}/ml/tests +${HOME}/ml4h/scripts/tf.sh -T ${HOME}/ml4h/tests ``` Unit tests can be run locally in a conda environment with ``` -python -m pytest ${HOME}/ml/tests +python -m pytest ${HOME}/ml4h/tests ``` Some of the unit tests are slow due to creating, saving and loading `tensorflow` models. To skip those tests to move quickly, run ``` -python -m pytest ${HOME}/ml/tests -m "not slow" +python -m pytest ${HOME}/ml4h/tests -m "not slow" ``` pytest can also run specific tests using `::`. For example ``` -python -m pytest ${HOME}/ml/tests/test_models.py::TestMakeMultimodalMultitaskModel::test_u_connect_segment +python -m pytest ${HOME}/ml4h/tests/test_models.py::TestMakeMultimodalMultitaskModel::test_u_connect_segment ``` For more pytest usage information, checkout the [usage guide](https://docs.pytest.org/en/latest/usage.html). ## Testing of `visualization_tools` -The code in [ml4cvd/visualization_tools](https://github.com/broadinstitute/ml/tree/master/ml4cvd/visualization_tools) is primarily interactive so we add test cases to notebook [test_error_handling_for_notebook_visualizations.ipynb](https://github.com/broadinstitute/ml/blob/master/notebooks/review_results/test_error_handling_for_notebook_visualizations.ipynb) and visually inspect the output of `Cells -> Run all`. +The code in [ml4h/visualization_tools](https://github.com/broadinstitute/ml4h/tree/master/ml4h/visualization_tools) is primarily interactive so we add test cases to notebook [test_error_handling_for_notebook_visualizations.ipynb](https://github.com/broadinstitute/ml4h/blob/master/notebooks/review_results/test_error_handling_for_notebook_visualizations.ipynb) and visually inspect the output of `Cells -> Run all`. # Appendix -For the ml4cvd GitHub repository, we are doing ‘merge and squash’ of pull requests. So that means your fork does not match upstream after your pull request has been merged. The easiest way to manage this is to always work in a feature branch, instead of checking changes into your fork’s master branch. +For the ml4h GitHub repository, we are doing ‘merge and squash’ of pull requests. So that means your fork does not match upstream after your pull request has been merged. The easiest way to manage this is to always work in a feature branch, instead of checking changes into your fork’s master branch. ## How to work on a new feature @@ -128,7 +128,7 @@ git fetch upstream Note: If you get an error saying that upstream is unknown, run the following remote add command and then re-run the fetch command. You only need to do this once per git clone. ``` -git remote add upstream https://github.com/broadinstitute/ml.git +git remote add upstream https://github.com/broadinstitute/ml4h.git ``` (2) Make sure your master branch is “even” with upstream. diff --git a/docker/vm_boot_images/config/tensorflow-requirements.txt b/docker/vm_boot_images/config/tensorflow-requirements.txt index d782967af..880b7ee0a 100644 --- a/docker/vm_boot_images/config/tensorflow-requirements.txt +++ b/docker/vm_boot_images/config/tensorflow-requirements.txt @@ -1,3 +1,4 @@ +tensorflow==2.1.0 h5py==2.10.0 pydot==1.2.4 nibabel==2.5.0 diff --git a/ml4h/arguments.py b/ml4h/arguments.py index f00c6f9f9..051d08d69 100644 --- a/ml4h/arguments.py +++ b/ml4h/arguments.py @@ -29,7 +29,7 @@ from ml4h.models import NORMALIZATION_CLASSES, CONV_REGULARIZATION_CLASSES, DENSE_REGULARIZATION_CLASSES from ml4h.tensormap.mgb.dynamic import make_mgb_dynamic_tensor_maps from ml4h.defines import IMPUTATION_RANDOM, IMPUTATION_MEAN -from ml4h.tensormap.tensor_map_maker import generate_continuous_tensor_map_from_file, generate_random_text_tensor_maps +from ml4h.tensormap.tensor_map_maker import generate_continuous_tensor_map_from_file, generate_random_text_tensor_maps, make_test_tensor_maps BOTTLENECK_STR_TO_ENUM = { @@ -340,6 +340,14 @@ def tensormap_lookup(module_string: str, prefix: str = "ml4h.tensormap"): if isinstance(tm, TensorMap) == True: return tm + tm = _build_mgb_time_series_tensor_maps(module_string) + if isinstance(tm, TensorMap) == True: + return tm + + tm = make_test_tensor_maps(module_string) + if isinstance(tm, TensorMap) == True: + return tm + if isinstance(module_string, str) == False: raise TypeError(f"Input name must be a string. Given: {type(module_string)}") if len(module_string) == 0: diff --git a/ml4h/tensormap/tensor_map_maker.py b/ml4h/tensormap/tensor_map_maker.py index 154212544..f2090154d 100644 --- a/ml4h/tensormap/tensor_map_maker.py +++ b/ml4h/tensormap/tensor_map_maker.py @@ -18,6 +18,15 @@ LESS_THAN_CODES = "('Less than a year', 'Less than once a week', 'Less than one mile', 'Less than an hour a day', 'Less than one a day', 'Less than one', 'Less than once a year', 'Less than 1 year ago', 'Less than a year ago', 'Less than one year', 'Less than one cigarette per day')" +def make_test_tensor_maps(desired_map_name: str) -> TensorMap: + for n in range(1, 6): + if desired_map_name == f'{n}d_cont': + return TensorMap(f'{n}d_cont', shape=tuple(range(2, n + 2)), interpretation=Interpretation.CONTINUOUS) + if desired_map_name == f'{n}d_cat': + return TensorMap(f'{n}d_cat', shape=tuple(range(2, n + 2)), interpretation=Interpretation.CATEGORICAL, + channel_map={f'c_{i}': i for i in range(n + 1)},) + + def write_tensor_maps(args) -> None: logging.info("Making tensor maps...") @@ -258,3 +267,4 @@ def generate_random_text_tensor_maps(text_file: str, window_size: int, one_hot: cacheable=False, ) return input_map, burn_in, output_map + diff --git a/setup.py b/setup.py index a6f9b39b7..319b741c6 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,16 @@ +import pathlib from setuptools import setup, find_packages +here = pathlib.Path(__file__).parent.resolve() +# Get the requirements from the requirements file +requirements = (here / 'docker/vm_boot_images/config/tensorflow-requirements.txt').read_text(encoding='utf-8') + setup( name='ml4h', version='0.0.1', description='Machine Learning for Health python package', - url='https://github.com/broadinstitute/ml', + url='https://github.com/broadinstitute/ml4h', python_requires='>=3.6', - install_requires=[], + install_requires=requirements, packages=find_packages(), ) diff --git a/tests/conftest.py b/tests/conftest.py index b553cf848..3ca4d058c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,4 @@ import sys -import mock import pytest from ml4h.arguments import parse_args @@ -7,11 +6,11 @@ from ml4h.test_utils import build_hdf5s -def pytest_configure(): +def pytest_configure(config): pytest.N_TENSORS = 50 + config.addinivalue_line("markers", "slow: mark tests as slow") -#@mock.patch.dict(TMAPS, MOCK_TMAPS) @pytest.fixture(scope='class') def default_arguments(tmpdir_factory): temp_dir = tmpdir_factory.mktemp('data') diff --git a/tests/test_arguments.py b/tests/test_arguments.py index 87de42dbb..dc764ac34 100644 --- a/tests/test_arguments.py +++ b/tests/test_arguments.py @@ -1,11 +1,9 @@ import sys -import mock import pytest -from ml4h.arguments import parse_args, TMAPS +from ml4h.arguments import parse_args from ml4h.test_utils import TMAPS as MOCK_TMAPS -@mock.patch.dict(TMAPS, MOCK_TMAPS) class TestUConnect: def test_no_u(self, tmpdir):