From a2f75bc7133e443db15192fd2fe4f1f2a37976fe Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 22 Oct 2024 18:26:21 +0200 Subject: [PATCH] Update project --- .bumpversion.cfg | 32 - .github/workflows/tests.yml | 80 +-- .readthedocs.yml | 13 +- MANIFEST.in | 15 +- README.md | 638 ++++++++++++++++++++ README.rst | 429 ------------- docs/source/conf.py | 10 +- pyproject.toml | 280 ++++++++- setup.cfg | 196 ------ src/pyobo/__init__.py | 2 - src/pyobo/__main__.py | 2 - src/pyobo/api/__init__.py | 2 - src/pyobo/api/alts.py | 13 +- src/pyobo/api/hierarchy.py | 29 +- src/pyobo/api/metadata.py | 7 +- src/pyobo/api/names.py | 63 +- src/pyobo/api/properties.py | 13 +- src/pyobo/api/relations.py | 23 +- src/pyobo/api/species.py | 11 +- src/pyobo/api/typedefs.py | 4 +- src/pyobo/api/utils.py | 11 +- src/pyobo/api/xrefs.py | 9 +- src/pyobo/aws.py | 8 +- src/pyobo/cli/__init__.py | 2 - src/pyobo/cli/aws.py | 2 - src/pyobo/cli/cli.py | 2 - src/pyobo/cli/database.py | 4 +- src/pyobo/cli/lookup.py | 2 - src/pyobo/cli/utils.py | 2 - src/pyobo/constants.py | 2 - src/pyobo/getters.py | 45 +- src/pyobo/gilda_utils.py | 19 +- src/pyobo/identifier_utils.py | 16 +- src/pyobo/mocks.py | 11 +- src/pyobo/normalizer.py | 46 +- src/pyobo/obographs.py | 6 +- src/pyobo/plugins.py | 7 +- src/pyobo/py.typed | 0 src/pyobo/reader.py | 40 +- src/pyobo/registries/__init__.py | 2 - src/pyobo/registries/metaregistry.py | 14 +- src/pyobo/resource_utils.py | 4 +- src/pyobo/resources/__init__.py | 2 - src/pyobo/resources/ncbitaxon.py | 5 +- src/pyobo/resources/ro.py | 6 +- src/pyobo/sources/__init__.py | 2 - src/pyobo/sources/agrovoc.py | 2 - src/pyobo/sources/antibodyregistry.py | 5 +- src/pyobo/sources/biogrid.py | 8 +- src/pyobo/sources/ccle.py | 7 +- src/pyobo/sources/cgnc.py | 4 +- src/pyobo/sources/chebi.py | 6 +- src/pyobo/sources/chembl.py | 4 +- src/pyobo/sources/civic_gene.py | 5 +- src/pyobo/sources/complexportal.py | 8 +- src/pyobo/sources/conso.py | 6 +- src/pyobo/sources/cpt.py | 4 +- src/pyobo/sources/credit.py | 2 +- src/pyobo/sources/cvx.py | 4 +- src/pyobo/sources/depmap.py | 7 +- src/pyobo/sources/dictybase_gene.py | 4 +- src/pyobo/sources/drugbank.py | 13 +- src/pyobo/sources/drugbank_salt.py | 7 +- src/pyobo/sources/drugcentral.py | 12 +- src/pyobo/sources/expasy.py | 23 +- src/pyobo/sources/famplex.py | 8 +- src/pyobo/sources/flybase.py | 6 +- src/pyobo/sources/geonames.py | 2 +- src/pyobo/sources/gmt_utils.py | 11 +- src/pyobo/sources/go.py | 10 +- src/pyobo/sources/gwascentral_phenotype.py | 4 +- src/pyobo/sources/gwascentral_study.py | 5 +- src/pyobo/sources/hgnc.py | 13 +- src/pyobo/sources/hgncgenefamily.py | 6 +- src/pyobo/sources/icd10.py | 7 +- src/pyobo/sources/icd11.py | 7 +- src/pyobo/sources/icd_utils.py | 13 +- src/pyobo/sources/interpro.py | 8 +- src/pyobo/sources/itis.py | 4 +- src/pyobo/sources/kegg/__init__.py | 2 - src/pyobo/sources/kegg/api.py | 7 +- src/pyobo/sources/kegg/genes.py | 7 +- src/pyobo/sources/kegg/genome.py | 4 +- src/pyobo/sources/kegg/pathway.py | 11 +- src/pyobo/sources/mesh.py | 40 +- src/pyobo/sources/mgi.py | 4 +- src/pyobo/sources/mirbase.py | 10 +- src/pyobo/sources/mirbase_constants.py | 2 - src/pyobo/sources/mirbase_family.py | 4 +- src/pyobo/sources/mirbase_mature.py | 4 +- src/pyobo/sources/msigdb.py | 9 +- src/pyobo/sources/ncbigene.py | 8 +- src/pyobo/sources/npass.py | 4 +- src/pyobo/sources/omim_ps.py | 4 +- src/pyobo/sources/pathbank.py | 8 +- src/pyobo/sources/pfam.py | 4 +- src/pyobo/sources/pfam_clan.py | 4 +- src/pyobo/sources/pid.py | 8 +- src/pyobo/sources/pombase.py | 4 +- src/pyobo/sources/pubchem.py | 5 +- src/pyobo/sources/reactome.py | 6 +- src/pyobo/sources/rgd.py | 5 +- src/pyobo/sources/rhea.py | 15 +- src/pyobo/sources/ror.py | 5 +- src/pyobo/sources/selventa/__init__.py | 2 - src/pyobo/sources/selventa/schem.py | 4 +- src/pyobo/sources/selventa/scomp.py | 4 +- src/pyobo/sources/selventa/sdis.py | 4 +- src/pyobo/sources/selventa/sfam.py | 4 +- src/pyobo/sources/sgd.py | 4 +- src/pyobo/sources/slm.py | 4 +- src/pyobo/sources/umls/__init__.py | 2 - src/pyobo/sources/umls/__main__.py | 2 - src/pyobo/sources/umls/get_synonym_types.py | 2 +- src/pyobo/sources/umls/umls.py | 6 +- src/pyobo/sources/uniprot/__init__.py | 2 - src/pyobo/sources/uniprot/uniprot.py | 7 +- src/pyobo/sources/uniprot/uniprot_ptm.py | 11 +- src/pyobo/sources/utils.py | 8 +- src/pyobo/sources/wikipathways.py | 4 +- src/pyobo/sources/zfin.py | 5 +- src/pyobo/ssg/__init__.py | 5 +- src/pyobo/struct/__init__.py | 2 - src/pyobo/struct/reference.py | 22 +- src/pyobo/struct/struct.py | 193 +++--- src/pyobo/struct/typedef.py | 19 +- src/pyobo/struct/utils.py | 2 - src/pyobo/utils/__init__.py | 2 - src/pyobo/utils/cache.py | 19 +- src/pyobo/utils/io.py | 19 +- src/pyobo/utils/iter.py | 11 +- src/pyobo/utils/misc.py | 4 +- src/pyobo/utils/ndex_utils.py | 13 +- src/pyobo/utils/path.py | 9 +- src/pyobo/version.py | 6 +- src/pyobo/xrefdb/__init__.py | 2 - src/pyobo/xrefdb/canonicalizer.py | 45 +- src/pyobo/xrefdb/priority.py | 2 - src/pyobo/xrefdb/sources/__init__.py | 7 +- src/pyobo/xrefdb/sources/biomappings.py | 2 - src/pyobo/xrefdb/sources/cbms2019.py | 2 - src/pyobo/xrefdb/sources/chembl.py | 2 - src/pyobo/xrefdb/sources/compath.py | 4 +- src/pyobo/xrefdb/sources/famplex.py | 8 +- src/pyobo/xrefdb/sources/gilda.py | 2 - src/pyobo/xrefdb/sources/intact.py | 10 +- src/pyobo/xrefdb/sources/ncit.py | 4 +- src/pyobo/xrefdb/sources/pubchem.py | 7 +- src/pyobo/xrefdb/sources/wikidata.py | 6 +- src/pyobo/xrefdb/xrefs_pipeline.py | 31 +- tests/__init__.py | 2 - tests/constants.py | 2 - tests/test_alt_ids.py | 2 - tests/test_caches.py | 10 +- tests/test_extract.py | 2 - tests/test_get.py | 4 +- tests/test_gmt.py | 2 - tests/test_ground.py | 2 - tests/test_mapper.py | 2 - tests/test_sources/__init__.py | 2 - tests/test_sources/test_famplex.py | 2 - tests/test_struct.py | 2 - tests/test_utils.py | 2 - tests/test_version_pins.py | 3 +- tox.ini | 276 ++++++--- 165 files changed, 1775 insertions(+), 1625 deletions(-) delete mode 100644 .bumpversion.cfg create mode 100644 README.md delete mode 100644 README.rst delete mode 100644 setup.cfg create mode 100644 src/pyobo/py.typed diff --git a/.bumpversion.cfg b/.bumpversion.cfg deleted file mode 100644 index 7c20868f..00000000 --- a/.bumpversion.cfg +++ /dev/null @@ -1,32 +0,0 @@ -[bumpversion] -current_version = 0.10.13-dev -commit = True -tag = False -parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? -serialize = - {major}.{minor}.{patch}-{release}+{build} - {major}.{minor}.{patch}+{build} - {major}.{minor}.{patch}-{release} - {major}.{minor}.{patch} - -[bumpversion:part:release] -optional_value = production -first_value = dev -values = - dev - production - -[bumpverion:part:build] -values = [0-9A-Za-z-]+ - -[bumpversion:file:setup.cfg] -search = version = {current_version} -replace = version = {new_version} - -[bumpversion:file:docs/source/conf.py] -search = release = "{current_version}" -replace = release = "{new_version}" - -[bumpversion:file:src/pyobo/version.py] -search = VERSION = "{current_version}" -replace = VERSION = "{new_version}" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1c1e63dc..ebf81a3b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,3 +1,7 @@ +# This file configures the continuous integration (CI) system on GitHub. +# Introductory materials can be found here: https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions. +# Documentation for editing this file can be found here: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions + name: Tests on: @@ -8,71 +12,67 @@ on: jobs: lint: - name: Lint + name: Code Quality runs-on: ubuntu-latest strategy: matrix: - python-version: [ "3.9", "3.12" ] + python-version: [ "3.12", "3.9" ] + tox-command: ["manifest", "lint", "pyroma", "mypy"] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - name: "Install uv" + uses: "astral-sh/setup-uv@v3" with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies + enable-cache: true + cache-dependency-glob: "pyproject.toml" + - name: "Run command" run: | - sudo apt-get install graphviz - pip install tox - - name: Check manifest - run: tox -e manifest - - name: Check code quality with flake8 - run: tox -e flake8 - - name: Check package metadata with Pyroma - run: tox -e pyroma - - name: Check static typing with MyPy - run: tox -e mypy + uvx -p ${{ matrix.python-version }} --with tox-uv tox -e ${{ matrix.tox-command }} + docs: name: Documentation runs-on: ubuntu-latest strategy: matrix: + # We only test documentation on the latest version + # sphinx 8.0 / sphinx-rtd-theme 3.0 discontinued Python 3.9 support + # a year early, which prompted re-thinking about this. python-version: [ "3.12" ] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - name: "Install uv" + uses: "astral-sh/setup-uv@v3" with: - python-version: ${{ matrix.python-version }} + enable-cache: true + cache-dependency-glob: "pyproject.toml" - name: Install dependencies run: | sudo apt-get install graphviz - pip install tox tox-uv - name: Check RST conformity with doc8 - run: tox -e doc8 - - name: Check README.rst - run: tox -e doc8 + run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e doc8 + - name: Check docstring coverage + run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e docstr-coverage - name: Check documentation build with Sphinx - run: tox -e docs-test + run: uvx -p ${{ matrix.python-version }} --with tox-uv tox -e docs-test tests: name: Tests runs-on: ${{ matrix.os }} strategy: matrix: os: [ ubuntu-latest ] - python-version: [ "3.9", "3.12" ] - exclude: - - os: windows-latest - python-version: 3.9 + python-version: [ "3.12", "3.9" ] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - name: "Install uv" + uses: "astral-sh/setup-uv@v3" with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - sudo apt-get install graphviz - pip install tox tox-uv - - name: Test with pytest + enable-cache: true + cache-dependency-glob: "pyproject.toml" + - name: Test with pytest and generate coverage file run: - tox -e py + uvx -p ${{ matrix.python-version }} --with tox-uv tox -e py + - name: Upload coverage report to codecov + uses: codecov/codecov-action@v4 + if: success() + with: + file: coverage.xml diff --git a/.readthedocs.yml b/.readthedocs.yml index f1bc1bed..5212949a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,15 +1,20 @@ -# See: https://docs.readthedocs.io/en/latest/config-file/v2.html +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 +# Set the version of Python and other tools you might need build: - image: latest + os: ubuntu-22.04 + apt_packages: + - graphviz + tools: + python: "3.12" python: - version: "3.8" install: - method: pip path: . extra_requirements: - docs - - rdflib diff --git a/MANIFEST.in b/MANIFEST.in index c5f3d9b8..2617d6f4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,14 +1,11 @@ graft src graft tests -prune benchmarking +prune scripts +prune notebooks prune tests/.pytest_cache -prune docs/source/api +prune docs -recursive-include docs/source *.py -recursive-include docs/source *.rst -recursive-include docs/source *.png +global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle .idea/** -global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle - -exclude .bumpversion.cfg Dockerfile docker-compose.yml -include *.rst *.yml LICENSE tox.ini +include README.md LICENSE +exclude tox.ini .readthedocs.yml diff --git a/README.md b/README.md new file mode 100644 index 00000000..8ed9f279 --- /dev/null +++ b/README.md @@ -0,0 +1,638 @@ + + +

+ PyOBO +

+ +

+ + Tests + + PyPI + + PyPI - Python Version + + PyPI - License + + Documentation Status + + Codecov status + + Cookiecutter template from @cthoyt + + Code style: black + + Contributor Covenant +

+ +Tools for biological identifiers, names, synonyms, xrefs, hierarchies, relations, and properties through the +perspective of OBO. + +## Example Usage + +Note! PyOBO is no-nonsense. This means that there's no repetitive +prefixes in identifiers. It also means all identifiers are strings, +no exceptions. + +Note! The first time you run these, they have to download and cache +all resources. We're not in the business of redistributing data, +so all scripts should be completely reproducible. There's some +AWS tools for hosting/downloading pre-compiled versions in +`pyobo.aws` if you don't have time for that. + +Note! PyOBO can perform grounding in a limited number of cases, but +it is *not* a general solution for named entity recognition (NER) or grounding. +It's suggested to check `Gilda `_ +for a no-nonsense solution. + +### Mapping Identifiers and CURIEs + +Get mapping of ChEBI identifiers to names: + +```python +import pyobo + +chebi_id_to_name = pyobo.get_id_name_mapping('chebi') + +name = chebi_id_to_name['132964'] +assert name == 'fluazifop-P-butyl' +``` + +Or, you don't have time for two lines: + +```python +import pyobo + +name = pyobo.get_name('chebi', '132964') +assert name == 'fluazifop-P-butyl' +``` + +Get reverse mapping of ChEBI names to identifiers: + +```python +import pyobo + +chebi_name_to_id = pyobo.get_name_id_mapping('chebi') + +identifier = chebi_name_to_id['fluazifop-P-butyl'] +assert identifier == '132964' +``` + +Maybe you live in CURIE world and just want to normalize something like +`CHEBI:132964`: + +```python +import pyobo + +name = pyobo.get_name_by_curie('CHEBI:132964') +assert name == 'fluazifop-P-butyl' +``` + +Sometimes you accidentally got an old CURIE. It can be mapped to the more recent +one using alternative identifiers listed in the underlying OBO with: + +```python +import pyobo + +# Look up DNA-binding transcription factor activity (go:0003700) +# based on an old id +primary_curie = pyobo.get_primary_curie('go:0001071') +assert primary_curie == 'go:0003700' + +# If it's already the primary, it just gets returned +assert 'go:0003700' == pyobo.get_priority_curie('go:0003700') +``` + +### Mapping Species + +Some resources have species information for their term. Get a mapping of WikiPathway identifiers +to species (as NCBI taxonomy identifiers): + +```python +import pyobo + +wikipathways_id_to_species = pyobo.get_id_species_mapping('wikipathways') + +# Apoptosis (Homo sapiens) +taxonomy_id = wikipathways_id_to_species['WP254'] +assert taxonomy_id == '9606' +``` + +Or, you don't have time for two lines: + +```python +import pyobo + +# Apoptosis (Homo sapiens) +taxonomy_id = pyobo.get_species('wikipathways', 'WP254') +assert taxonomy_id == '9606' +``` + +### Grounding + +Maybe you've got names/synonyms you want to try and map back to ChEBI synonyms. +Given the brand name `Fusilade II` of `CHEBI:132964`, it should be able to look +it up and its preferred label. + +```python +import pyobo + +prefix, identifier, name = pyobo.ground('chebi', 'Fusilade II') +assert prefix == 'chebi' +assert identifier == '132964' +assert name == 'fluazifop-P-butyl' + +# When failure happens... +prefix, identifier, name = pyobo.ground('chebi', 'Definitely not a real name') +assert prefix is None +assert identifier is None +assert name is None +``` + +If you're not really sure which namespace a name might belong to, you +can try a few in a row (prioritize by ones that cover the appropriate +entity type to avoid false positives in case of conflicts): + +```python +import pyobo + +# looking for phenotypes/pathways +prefix, identifier, name = pyobo.ground(['efo', 'go'], 'ERAD') +assert prefix == 'go' +assert identifier == '0030433' +assert name == 'ubiquitin-dependent ERAD pathway' +``` + +### Cross-referencing + +Get xrefs from ChEBI to PubChem: + +```python +import pyobo + +chebi_id_to_pubchem_compound_id = pyobo.get_filtered_xrefs('chebi', 'pubchem.compound') + +pubchem_compound_id = chebi_id_to_pubchem_compound_id['132964'] +assert pubchem_compound_id == '3033674' +``` + +If you don't have time for two lines: + +```python +import pyobo + +pubchem_compound_id = pyobo.get_xref('chebi', '132964', 'pubchem.compound') +assert pubchem_compound_id == '3033674' +``` + +Get xrefs from Entrez to HGNC, but they're only available through HGNC, +so you need to flip them: + +```python +import pyobo + +hgnc_id_to_ncbigene_id = pyobo.get_filtered_xrefs('hgnc', 'ncbigene') +ncbigene_id_to_hgnc_id = { + ncbigene_id: hgnc_id + for hgnc_id, ncbigene_id in hgnc_id_to_ncbigene_id.items() +} +mapt_hgnc = ncbigene_id_to_hgnc_id['4137'] +assert mapt_hgnc == '6893' +``` + +Since this is a common pattern, there's a keyword argument `flip` +that does this for you: + +```python +import pyobo + +ncbigene_id_to_hgnc_id = pyobo.get_filtered_xrefs('hgnc', 'ncbigene', flip=True) +mapt_hgnc_id = ncbigene_id_to_hgnc_id['4137'] +assert mapt_hgnc_id == '6893' +``` + +If you don't have time for two lines (I admit this one is a bit confusing) and +need to flip it: + +```python +import pyobo + +hgnc_id = pyobo.get_xref('hgnc', '4137', 'ncbigene', flip=True) +assert hgnc_id == '6893' +``` + +Remap a CURIE based on pre-defined priority list and [Inspector Javert's Xref +Database](https://cthoyt.com/2020/04/19/inspector-javerts-xref-database.html): + +```python + +import pyobo + +# Map to the best source possible +mapt_ncbigene = pyobo.get_priority_curie('hgnc:6893') +assert mapt_ncbigene == 'ncbigene:4137' + +# Sometimes you know you're the best. Own it. +assert 'ncbigene:4137' == pyobo.get_priority_curie('ncbigene:4137') +``` + +Find all CURIEs mapped to a given one using Inspector Javert's Xref Database: + +```python +import pyobo + +# Get a set of all CURIEs mapped to MAPT +mapt_curies = pyobo.get_equivalent('hgnc:6893') +assert 'ncbigene:4137' in mapt_curies +assert 'ensembl:ENSG00000186868' in mapt_curies +``` + +If you don't want to wait to build the database locally for the `pyobo.get_priority_curie` and +`pyobo.get_equivalent`, you can use the following code to download a release from +[Zenodo](https://zenodo.org/record/3757266): + +```python +import pyobo.resource_utils + +pyobo.resource_utils.ensure_inspector_javert() +``` + +### Properties + +Get properties, like SMILES. The semantics of these are defined on an OBO-OBO basis. + +```python +import pyobo + +# I don't make the rules. I wouldn't have chosen this as the key for this property. It could be any string +chebi_smiles_property = 'http://purl.obolibrary.org/obo/chebi/smiles' +chebi_id_to_smiles = pyobo.get_filtered_properties_mapping('chebi', chebi_smiles_property) + +smiles = chebi_id_to_smiles['132964'] +assert smiles == 'C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F' +``` + +If you don't have time for two lines: + +```python +import pyobo + +smiles = pyobo.get_property('chebi', '132964', 'http://purl.obolibrary.org/obo/chebi/smiles') +assert smiles == 'C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F' +``` + +### Hierarchy + +Check if an entity is in the hierarchy: + +```python +import networkx as nx +import pyobo + +# check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process +assert 'go:0008219' in pyobo.get_ancestors('go', '0006915') + +# check that go:0070246 ! natural killer cell apoptotic process is a +# descendant of go:0006915 ! apoptotic process +apopototic_process_descendants = pyobo.get_descendants('go', '0006915') +assert 'go:0070246' in apopototic_process_descendants +``` + +Get the sub-hierarchy below a given node: + +```python +import pyobo + +# get the descendant graph of go:0006915 ! apoptotic process +apopototic_process_subhierarchy = pyobo.get_subhierarchy('go', '0006915') + +# check that go:0070246 ! natural killer cell apoptotic process is a +# descendant of go:0006915 ! apoptotic process through the subhierarchy +assert 'go:0070246' in apopototic_process_subhierarchy +``` + +Get a hierarchy with properties preloaded in the node data dictionaries: + +```python +import pyobo + +prop = 'http://purl.obolibrary.org/obo/chebi/smiles' +chebi_hierarchy = pyobo.get_hierarchy('chebi', properties=[prop]) + +assert 'chebi:132964' in chebi_hierarchy +assert prop in chebi_hierarchy.nodes['chebi:132964'] +assert chebi_hierarchy.nodes['chebi:132964'][prop] == 'C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F' +``` + +### Relations + +Get all orthologies (`ro:HOM0000017`) between HGNC and MGI (note: this is one way) + +```python +>>> import pyobo +>>> human_mapt_hgnc_id = '6893' +>>> mouse_mapt_mgi_id = '97180' +>>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi') +>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id] +``` + +If you want to do it in one line, use: + +```python + +>>> import pyobo +>>> human_mapt_hgnc_id = '6893' +>>> mouse_mapt_mgi_id = '97180' +>>> assert mouse_mapt_mgi_id == pyobo.get_relation('hgnc', 'ro:HOM0000017', 'mgi', human_mapt_hgnc_id) +``` + +### Writings Tests that Use PyOBO + +If you're writing your own code that relies on PyOBO, and unit +testing it (as you should) in a continuous integration setting, +you've probably realized that loading all of the resources on each +build is not so fast. In those scenarios, you can use some of the +pre-build patches like in the following: + +```python +import unittest +import pyobo +from pyobo.mocks import get_mock_id_name_mapping + +mock_id_name_mapping = get_mock_id_name_mapping({ + 'chebi': { + '132964': 'fluazifop-P-butyl', + }, +}) + +class MyTestCase(unittest.TestCase): + def my_test(self): + with mock_id_name_mapping: + # use functions directly, or use your functions that wrap them + pyobo.get_name('chebi', '1234') +``` + + +## Curation of the Bioregistry + +In order to normalize references and identify resources, PyOBO uses the +[Bioregistry](https://github.com/bioregistry/bioregistry). It used to be a part of PyOBO, but has since +been externalized for more general reuse. + +At [src/pyobo/registries/metaregistry.json](https://github.com/pyobo/pyobo/blob/master/src/pyobo/registries/metaregistry.json) +is the curated "metaregistry". This is a source of information that contains +all sorts of fixes for missing/wrong information in MIRIAM, OLS, and OBO Foundry; entries that don't appear in +any of them; additional synonym information for each namespace/prefix; rules for normalizing xrefs and CURIEs, etc. + +Other entries in the metaregistry: + +- The `"remappings"->"full"` entry is a dictionary from strings that might follow `xref:` + in a given OBO file that need to be completely replaced, due to incorrect formatting +- The `"remappings"->"prefix"` entry contains a dictionary of prefixes for xrefs that need + to be remapped. Several rules, for example, remove superfluous spaces that occur inside + CURIEs or and others address instances of the GOGO issue. +- The `"blacklists"` entry contains rules for throwing out malformed xrefs based on + full string, just prefix, or just suffix. + +## Troubleshooting + +The OBO Foundry seems to be pretty unstable with respect to the URLs to OBO resources. If you get an error like: + +.. code-block:: + + pyobo.getters.MissingOboBuild: OBO Foundry is missing a build for: mondo + +Then you should check the corresponding page on the OBO Foundry (in this case, http://www.obofoundry.org/ontology/mondo.html) +and make update to the `url` entry for that namespace in the Bioregistry. + + +## 🚀 Installation + +The most recent release can be installed from +[PyPI](https://pypi.org/project/pyobo/) with: + +```shell +pip install pyobo +``` + +The most recent code and data can be installed directly from GitHub with: + +```shell +pip install git+https://github.com/biopragmatics/pyobo.git +``` + +## 👐 Contributing + +Contributions, whether filing an issue, making a pull request, or forking, are appreciated. See +[CONTRIBUTING.md](https://github.com/biopragmatics/pyobo/blob/master/.github/CONTRIBUTING.md) +for more information on getting involved. + +## 👋 Attribution + +### ⚖️ License + +The code in this package is licensed under the MIT License. + + + + + + + +### 🍪 Cookiecutter + +This package was created with [@audreyfeldroy](https://github.com/audreyfeldroy)'s +[cookiecutter](https://github.com/cookiecutter/cookiecutter) package using [@cthoyt](https://github.com/cthoyt)'s +[cookiecutter-snekpack](https://github.com/cthoyt/cookiecutter-snekpack) template. + +## 🛠️ For Developers + +
+ See developer instructions + +The final section of the README is for if you want to get involved by making a code contribution. + +### Development Installation + +To install in development mode, use the following: + +```bash +git clone git+https://github.com/biopragmatics/pyobo.git +cd pyobo +pip install -e . +``` + +### Updating Package Boilerplate + +This project uses `cruft` to keep boilerplate (i.e., configuration, contribution guidelines, documentation +configuration) +up-to-date with the upstream cookiecutter package. Update with the following: + +```shell +pip install cruft +cruft update +``` + +More info on Cruft's update command is +available [here](https://github.com/cruft/cruft?tab=readme-ov-file#updating-a-project). + +### 🥼 Testing + +After cloning the repository and installing `tox` with `pip install tox tox-uv`, +the unit tests in the `tests/` folder can be run reproducibly with: + +```shell +tox -e py +``` + +Additionally, these tests are automatically re-run with each commit in a +[GitHub Action](https://github.com/biopragmatics/pyobo/actions?query=workflow%3ATests). + +### 📖 Building the Documentation + +The documentation can be built locally using the following: + +```shell +git clone git+https://github.com/biopragmatics/pyobo.git +cd pyobo +tox -e docs +open docs/build/html/index.html +``` + +The documentation automatically installs the package as well as the `docs` +extra specified in the [`pyproject.toml`](../../Desktop/pyobo/pyproject.toml). `sphinx` plugins +like `texext` can be added there. Additionally, they need to be added to the +`extensions` list in [`docs/source/conf.py`](../../Desktop/pyobo/docs/source/conf.py). + +The documentation can be deployed to [ReadTheDocs](https://readthedocs.io) using +[this guide](https://docs.readthedocs.io/en/stable/intro/import-guide.html). +The [`.readthedocs.yml`](../../Desktop/pyobo/.readthedocs.yml) YAML file contains all the configuration you'll need. +You can also set up continuous integration on GitHub to check not only that +Sphinx can build the documentation in an isolated environment (i.e., with `tox -e docs-test`) +but also that [ReadTheDocs can build it too](https://docs.readthedocs.io/en/stable/pull-requests.html). + +#### Configuring ReadTheDocs + +1. Log in to ReadTheDocs with your GitHub account to install the integration + at https://readthedocs.org/accounts/login/?next=/dashboard/ +2. Import your project by navigating to https://readthedocs.org/dashboard/import then clicking the plus icon next to + your repository +3. You can rename the repository on the next screen using a more stylized name (i.e., with spaces and capital letters) +4. Click next, and you're good to go! + +### 📦 Making a Release + +#### Configuring Zenodo + +[Zenodo](https://zenodo.org) is a long-term archival system that assigns a DOI to each release of your package. + +1. Log in to Zenodo via GitHub with this link: https://zenodo.org/oauth/login/github/?next=%2F. This brings you to a + page that lists all of your organizations and asks you to approve installing the Zenodo app on GitHub. Click "grant" + next to any organizations you want to enable the integration for, then click the big green "approve" button. This + step only needs to be done once. +2. Navigate to https://zenodo.org/account/settings/github/, which lists all of your GitHub repositories (both in your + username and any organizations you enabled). Click the on/off toggle for any relevant repositories. When you make + a new repository, you'll have to come back to this + +After these steps, you're ready to go! After you make "release" on GitHub (steps for this are below), you can navigate +to https://zenodo.org/account/settings/github/repository/biopragmatics/pyobo +to see the DOI for the release and link to the Zenodo record for it. + +#### Registering with the Python Package Index (PyPI) + +You only have to do the following steps once. + +1. Register for an account on the [Python Package Index (PyPI)](https://pypi.org/account/register) +2. Navigate to https://pypi.org/manage/account and make sure you have verified your email address. A verification email + might not have been sent by default, so you might have to click the "options" dropdown next to your address to get to + the "re-send verification email" button +3. 2-Factor authentication is required for PyPI since the end of 2023 (see + this [blog post from PyPI](https://blog.pypi.org/posts/2023-05-25-securing-pypi-with-2fa/)). This means + you have to first issue account recovery codes, then set up 2-factor authentication +4. Issue an API token from https://pypi.org/manage/account/token + +#### Configuring your machine's connection to PyPI + +You have to do the following steps once per machine. Create a file in your home directory called +`.pypirc` and include the following: + +```ini +[distutils] +index-servers = + pypi + testpypi + +[pypi] +username = __token__ +password = + +# This block is optional in case you want to be able to make test releases to the Test PyPI server +[testpypi] +repository = https://test.pypi.org/legacy/ +username = __token__ +password = +``` + +Note that since PyPI is requiring token-based authentication, we use `__token__` as the user, verbatim. +If you already have a `.pypirc` file with a `[distutils]` section, just make sure that there is an `index-servers` +key and that `pypi` is in its associated list. More information on configuring the `.pypirc` file can +be found [here](https://packaging.python.org/en/latest/specifications/pypirc). + +#### Uploading to PyPI + +After installing the package in development mode and installing +`tox` with `pip install tox tox-uv`, +run the following from the shell: + +```shell +tox -e finish +``` + +This script does the following: + +1. Uses [bump-my-version](https://github.com/callowayproject/bump-my-version) to switch the version number in + the `pyproject.toml`, `CITATION.cff`, `src/pyobo/version.py`, + and [`docs/source/conf.py`](../../Desktop/pyobo/docs/source/conf.py) to not have the `-dev` suffix +2. Packages the code in both a tar archive and a wheel using + [`uv build`](https://docs.astral.sh/uv/guides/publish/#building-your-package) +3. Uploads to PyPI using [`twine`](https://github.com/pypa/twine). +4. Push to GitHub. You'll need to make a release going with the commit where the version was bumped. +5. Bump the version to the next patch. If you made big changes and want to bump the version by minor, you can + use `tox -e bumpversion -- minor` after. + +#### Releasing on GitHub + +1. Navigate + to https://github.com/biopragmatics/pyobo/releases/new + to draft a new release +2. Click the "Choose a Tag" dropdown and select the tag corresponding to the release you just made +3. Click the "Generate Release Notes" button to get a quick outline of recent changes. Modify the title and description + as you see fit +4. Click the big green "Publish Release" button + +This will trigger Zenodo to assign a DOI to your release as well. + +
diff --git a/README.rst b/README.rst deleted file mode 100644 index 4c9c18ad..00000000 --- a/README.rst +++ /dev/null @@ -1,429 +0,0 @@ -PyOBO -===== -|build| |pypi_version| |python_versions| |pypi_license| |zenodo| |black| |bioregistry| - -Tools for biological identifiers, names, synonyms, xrefs, hierarchies, relations, and properties through the -perspective of OBO. - -Example Usage -------------- -Note! PyOBO is no-nonsense. This means that there's no repetitive -prefixes in identifiers. It also means all identifiers are strings, -no exceptions. - -Note! The first time you run these, they have to download and cache -all resources. We're not in the business of redistributing data, -so all scripts should be completely reproducible. There's some -AWS tools for hosting/downloading pre-compiled versions in -``pyobo.aws`` if you don't have time for that. - -Note! PyOBO can perform grounding in a limited number of cases, but -it is *not* a general solution for named entity recognition (NER) or grounding. -It's suggested to check `Gilda `_ -for a no-nonsense solution. - -Mapping Identifiers and CURIEs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Get mapping of ChEBI identifiers to names: - -.. code-block:: python - - import pyobo - - chebi_id_to_name = pyobo.get_id_name_mapping('chebi') - - name = chebi_id_to_name['132964'] - assert name == 'fluazifop-P-butyl' - -Or, you don't have time for two lines: - -.. code-block:: python - - import pyobo - - name = pyobo.get_name('chebi', '132964') - assert name == 'fluazifop-P-butyl' - -Get reverse mapping of ChEBI names to identifiers: - -.. code-block:: python - - import pyobo - - chebi_name_to_id = pyobo.get_name_id_mapping('chebi') - - identifier = chebi_name_to_id['fluazifop-P-butyl'] - assert identifier == '132964' - -Maybe you live in CURIE world and just want to normalize something like -`CHEBI:132964`: - -.. code-block:: python - - import pyobo - - name = pyobo.get_name_by_curie('CHEBI:132964') - assert name == 'fluazifop-P-butyl' - -Sometimes you accidentally got an old CURIE. It can be mapped to the more recent -one using alternative identifiers listed in the underlying OBO with: - -.. code-block:: python - - import pyobo - - # Look up DNA-binding transcription factor activity (go:0003700) - # based on an old id - primary_curie = pyobo.get_primary_curie('go:0001071') - assert primary_curie == 'go:0003700' - - # If it's already the primary, it just gets returned - assert 'go:0003700' == pyobo.get_priority_curie('go:0003700') - -Mapping Species -~~~~~~~~~~~~~~~ -Some resources have species information for their term. Get a mapping of WikiPathway identifiers -to species (as NCBI taxonomy identifiers): - -.. code-block:: python - - import pyobo - - wikipathways_id_to_species = pyobo.get_id_species_mapping('wikipathways') - - # Apoptosis (Homo sapiens) - taxonomy_id = wikipathways_id_to_species['WP254'] - assert taxonomy_id == '9606' - -Or, you don't have time for two lines: - -.. code-block:: python - - import pyobo - - # Apoptosis (Homo sapiens) - taxonomy_id = pyobo.get_species('wikipathways', 'WP254') - assert taxonomy_id == '9606' - -Grounding -~~~~~~~~~ -Maybe you've got names/synonyms you want to try and map back to ChEBI synonyms. -Given the brand name `Fusilade II` of `CHEBI:132964`, it should be able to look -it up and its preferred label. - -.. code-block:: python - - import pyobo - - prefix, identifier, name = pyobo.ground('chebi', 'Fusilade II') - assert prefix == 'chebi' - assert identifier == '132964' - assert name == 'fluazifop-P-butyl' - - # When failure happens... - prefix, identifier, name = pyobo.ground('chebi', 'Definitely not a real name') - assert prefix is None - assert identifier is None - assert name is None - -If you're not really sure which namespace a name might belong to, you -can try a few in a row (prioritize by ones that cover the appropriate -entity type to avoid false positives in case of conflicts): - -.. code-block:: python - - import pyobo - - # looking for phenotypes/pathways - prefix, identifier, name = pyobo.ground(['efo', 'go'], 'ERAD') - assert prefix == 'go' - assert identifier == '0030433' - assert name == 'ubiquitin-dependent ERAD pathway' - -Cross-referencing -~~~~~~~~~~~~~~~~~ -Get xrefs from ChEBI to PubChem: - -.. code-block:: python - - import pyobo - - chebi_id_to_pubchem_compound_id = pyobo.get_filtered_xrefs('chebi', 'pubchem.compound') - - pubchem_compound_id = chebi_id_to_pubchem_compound_id['132964'] - assert pubchem_compound_id == '3033674' - -If you don't have time for two lines: - -.. code-block:: python - - import pyobo - - pubchem_compound_id = pyobo.get_xref('chebi', '132964', 'pubchem.compound') - assert pubchem_compound_id == '3033674' - -Get xrefs from Entrez to HGNC, but they're only available through HGNC -so you need to flip them: - -.. code-block:: python - - import pyobo - - hgnc_id_to_ncbigene_id = pyobo.get_filtered_xrefs('hgnc', 'ncbigene') - ncbigene_id_to_hgnc_id = { - ncbigene_id: hgnc_id - for hgnc_id, ncbigene_id in hgnc_id_to_ncbigene_id.items() - } - mapt_hgnc = ncbigene_id_to_hgnc_id['4137'] - assert mapt_hgnc == '6893' - -Since this is a common pattern, there's a keyword argument `flip` -that does this for you: - -.. code-block:: python - - import pyobo - - ncbigene_id_to_hgnc_id = pyobo.get_filtered_xrefs('hgnc', 'ncbigene', flip=True) - mapt_hgnc_id = ncbigene_id_to_hgnc_id['4137'] - assert mapt_hgnc_id == '6893' - -If you don't have time for two lines (I admit this one is a bit confusing) and -need to flip it: - -.. code-block:: python - - import pyobo - - hgnc_id = pyobo.get_xref('hgnc', '4137', 'ncbigene', flip=True) - assert hgnc_id == '6893' - -Remap a CURIE based on pre-defined priority list and `Inspector Javert's Xref -Database `_: - -.. code-block:: python - - import pyobo - - # Map to the best source possible - mapt_ncbigene = pyobo.get_priority_curie('hgnc:6893') - assert mapt_ncbigene == 'ncbigene:4137' - - # Sometimes you know you're the best. Own it. - assert 'ncbigene:4137' == pyobo.get_priority_curie('ncbigene:4137') - -Find all CURIEs mapped to a given one using Inspector Javert's Xref Database: - -.. code-block:: python - - import pyobo - - # Get a set of all CURIEs mapped to MAPT - mapt_curies = pyobo.get_equivalent('hgnc:6893') - assert 'ncbigene:4137' in mapt_curies - assert 'ensembl:ENSG00000186868' in mapt_curies - -If you don't want to wait to build the database locally for the ``pyobo.get_priority_curie`` and -``pyobo.get_equivalent``, you can use the following code to download a release from -`Zenodo `_: - -.. code-block:: python - - import pyobo.resource_utils - - pyobo.resource_utils.ensure_inspector_javert() - -Properties -~~~~~~~~~~ -Get properties, like SMILES. The semantics of these are defined on an OBO-OBO basis. - -.. code-block:: python - - import pyobo - - # I don't make the rules. I wouldn't have chosen this as the key for this property. It could be any string - chebi_smiles_property = 'http://purl.obolibrary.org/obo/chebi/smiles' - chebi_id_to_smiles = pyobo.get_filtered_properties_mapping('chebi', chebi_smiles_property) - - smiles = chebi_id_to_smiles['132964'] - assert smiles == 'C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F' - -If you don't have time for two lines: - -.. code-block:: python - - import pyobo - - smiles = pyobo.get_property('chebi', '132964', 'http://purl.obolibrary.org/obo/chebi/smiles') - assert smiles == 'C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F' - -Hierarchy -~~~~~~~~~ -Check if an entity is in the hierarchy: - -.. code-block:: python - - import networkx as nx - import pyobo - - # check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process - assert 'go:0008219' in pyobo.get_ancestors('go', '0006915') - - # check that go:0070246 ! natural killer cell apoptotic process is a - # descendant of go:0006915 ! apoptotic process - apopototic_process_descendants = pyobo.get_descendants('go', '0006915') - assert 'go:0070246' in apopototic_process_descendants - -Get the subhierarchy below a given node: - -.. code-block:: python - - # get the descendant graph of go:0006915 ! apoptotic process - apopototic_process_subhierarchy = pyobo.get_subhierarchy('go', '0006915') - - # check that go:0070246 ! natural killer cell apoptotic process is a - # descendant of go:0006915 ! apoptotic process through the subhierarchy - assert 'go:0070246' in apopototic_process_subhierarchy - -Get a hierarchy with properties pre-loaded in the node data dictionaries: - -.. code-block:: python - - import pyobo - - prop = 'http://purl.obolibrary.org/obo/chebi/smiles' - chebi_hierarchy = pyobo.get_hierarchy('chebi', properties=[prop]) - - assert 'chebi:132964' in chebi_hierarchy - assert prop in chebi_hierarchy.nodes['chebi:132964'] - assert chebi_hierarchy.nodes['chebi:132964'][prop] == 'C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F' - -Relations -~~~~~~~~~ -Get all orthologies (``ro:HOM0000017``) between HGNC and MGI (note: this is one way) - -.. code-block:: python - - >>> import pyobo - >>> human_mapt_hgnc_id = '6893' - >>> mouse_mapt_mgi_id = '97180' - >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi') - >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id] - -If you want to do it in one line, use: - -.. code-block:: python - - >>> import pyobo - >>> human_mapt_hgnc_id = '6893' - >>> mouse_mapt_mgi_id = '97180' - >>> assert mouse_mapt_mgi_id == pyobo.get_relation('hgnc', 'ro:HOM0000017', 'mgi', human_mapt_hgnc_id) - -Writings Tests that Use PyOBO -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you're writing your own code that relies on PyOBO, and unit -testing it (as you should) in a continuous integration setting, -you've probably realized that loading all of the resources on each -build is not so fast. In those scenarios, you can use some of the -pre-build patches like in the following: - -.. code-block:: python - - import unittest - import pyobo - from pyobo.mocks import get_mock_id_name_mapping - - mock_id_name_mapping = get_mock_id_name_mapping({ - 'chebi': { - '132964': 'fluazifop-P-butyl', - }, - }) - - class MyTestCase(unittest.TestCase): - def my_test(self): - with mock_id_name_mapping: - # use functions directly, or use your functions that wrap them - pyobo.get_name('chebi', '1234') - - -Installation ------------- -PyOBO can be installed from `PyPI `_ with: - -.. code-block:: sh - - $ pip install pyobo - -It can be installed in development mode from `GitHub `_ -with: - -.. code-block:: sh - - $ git clone https://github.com/pyobo/pyobo.git - $ cd pyobo - $ pip install -e . - -Curation of the Bioregistry ---------------------------- -In order to normalize references and identify resources, PyOBO uses the -`Bioregistry `_. It used to be a part of PyOBO, but has since -been externalized for more general reuse. - -At `src/pyobo/registries/metaregistry.json `_ -is the curated "metaregistry". This is a source of information that contains -all sorts of fixes for missing/wrong information in MIRIAM, OLS, and OBO Foundry; entries that don't appear in -any of them; additional synonym information for each namespace/prefix; rules for normalizing xrefs and CURIEs, etc. - -Other entries in the metaregistry: - -- The ``"remappings"->"full"`` entry is a dictionary from strings that might follow ``xref:`` - in a given OBO file that need to be completely replaced, due to incorrect formatting -- The ``"remappings"->"prefix"`` entry contains a dictionary of prefixes for xrefs that need - to be remapped. Several rules, for example, remove superfluous spaces that occur inside - CURIEs or and others address instances of the GOGO issue. -- The ``"blacklists"`` entry contains rules for throwing out malformed xrefs based on - full string, just prefix, or just suffix. - -Troubleshooting ---------------- -The OBO Foundry seems to be pretty unstable with respect to the URLs to OBO resources. If you get an error like: - -.. code-block:: - - pyobo.getters.MissingOboBuild: OBO Foundry is missing a build for: mondo - -Then you should check the corresponding page on the OBO Foundry (in this case, http://www.obofoundry.org/ontology/mondo.html) -and make update to the ``url`` entry for that namespace in the Bioregistry. - -.. |build| image:: https://github.com/pyobo/pyobo/workflows/Tests/badge.svg - :target: https://github.com/pyobo/pyobo/actions?query=workflow%3ATests - :alt: Build Status - -.. |coverage| image:: https://codecov.io/gh/pyobo/pyobo/coverage.svg?branch=master - :target: https://codecov.io/gh/pyobo/pyobo?branch=master - :alt: Coverage Status - -.. |docs| image:: http://readthedocs.org/projects/pyobo/badge/?version=latest - :target: http://pyobo.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - -.. |python_versions| image:: https://img.shields.io/pypi/pyversions/pyobo.svg - :alt: Stable Supported Python Versions - -.. |pypi_version| image:: https://img.shields.io/pypi/v/pyobo.svg - :alt: Current version on PyPI - -.. |pypi_license| image:: https://img.shields.io/pypi/l/pyobo.svg - :alt: MIT License - -.. |zenodo| image:: https://zenodo.org/badge/203449095.svg - :target: https://zenodo.org/badge/latestdoi/203449095 - :alt: Zenodo - -.. |black| image:: https://img.shields.io/badge/code%20style-black-000000.svg - :target: https://github.com/psf/black - :alt: Black Code Style - -.. |bioregistry| image:: https://img.shields.io/static/v1?label=Powered%20by&message=Bioregistry&color=BA274A&style=flat&logo=image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACgAAAAoCAYAAACM/rhtAAAACXBIWXMAAAEnAAABJwGNvPDMAAAAGXRFWHRTb2Z0d2FyZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAACi9JREFUWIWtmXl41MUZxz/z291sstmQO9mQG0ISwHBtOOSwgpUQhApWgUfEowKigKI81actypaqFbWPVkGFFKU0Vgs+YgvhEAoqEUESrnDlEEhCbkLYJtlkk9399Y/N/rKbzQXt96+Zed+Z9/t7Z+adeecnuA1s5yFVSGrLOAf2qTiEEYlUZKIAfYdKE7KoBLkQSc4XgkPfXxz/owmT41ZtiVtR3j94eqxQq5aDeASIvkVb12RBtt0mb5xZsvfa/5XgnqTMcI3Eq7IQjwM+7jJJo8YvNhK/qDBUOl8A7JZWWqqu01Jeg6Pd1nW4NuBjjax6eWrRruv/M8EDqTMflmXeB0Jcbb6RIRhmTCJ0ymgC0wYjadTd9nW0tWMu+In63NNU7c3FWtvgJpXrZVlakVGU8/ltEcwzGjU3miI/ABa72vwTB5K45AEi7x2PUEl9fZsHZLuDmgPHuLJpJ82lle6iTSH6mpXp+fnt/Sa4yzhbp22yfwFkgnMaBy17kPhFmQh1997qLxztNkq35XB505fINtf0iz1WvfTQ7Pxdlj4Jdnjuny5yvpEhjHh7FQOGD/YyZi4owS86HJ+QQMDpJaBf3jUXlHD21+8q0y4LDppV/vfNO7+jzV3Pa6SOac0E8I8fSPonpm7JAVR+eRhzwU/Ofj+e49tpT/HdtGXcyLvQJ8HAtCTGfmJCF2dwfpTMz4NszX/uqqdyr+xPyVwoEK+C03PGrDX4GkJ7NBJ+txH/hCgAit7cRlNxOY62dmzmZgwzJvZJUh2gI/xnRmoOHsfe3AqQ/kho0qXs+pLzLh3FgwdT54YKxLsAQq0mbf1zHuTsltZejemHJSrlgGGDPGTXc09zdM5qTi59jZbKOg+Zb1QYI95+XokEQogPDifPDnPJFQ8uCkl8FyGmACQtn4dhxp3KINX7jnHi0ZeJnT8dla8Plbu+48zzfyJ08kh8ggIACB4zlIAhsURm3EnML6eB6Fzep1a+SUt5DS2VddTs+4GQccPRhgV1kowIQRaChhMXAPxkIev/Vl+8R/HgnqTMmI4gjH/iQOIXZSqdzQUlXDB9RPyi+1DrdVx67WMursvCkDERXYxB0ROSIOKecURMG+tBzkXAhbYbZk6teNPLkwmPzUIX71wuMiw+MHx2nEJQrWIFHSdE4pIHlFDisLZxYe1HhIwfTtLK+RSu30rVnlxGvrOapOcW9DsW3vH6CgKS4zxIXlz3Fw8dSaMmcfEcV9XHYbc/DSCZMEkgFoJzY0TeO17pVL7jANbaBoauWUJlTi4VOw+T9sazBKYl0ZB/qV/kALThQRi3vOJB0lpzw0vPMONOtOHOqRcyi7bzkEqanJo3HogBMGROUrziaGundGsOsQsyUPn6UPx2NvELZxIybhinn3uLyx9uVwaW7XbqjxdQmr2X0uy93Dh+Dtlu9zCu9vdj1PsvEWwcii7OwJAXFnoRFCoVhoxJrmr0gOQWo9qBfaorXodOHq0o1x8roN3cSMyC6ZT942uQBIlL53Jl804sV6oY9/fXAGg4WcjFdZuxlFV7GNPFRzFs7VKCRiV7ejJrTa/eDr1rFKXZOQCocEyTgHQAyUdD4B2d4cF8pohg4zC0YUFU7z5C9Jy7sVvbKPtsH6GT0tCGBtFwspBTz/zRixyApbSKk8te5+aZ4l4JdUVQWpIScmQhjGocUjJCRhcTieSjURQTF89FtttpuVaLpaya8Knp1B3OQ5Zlag/nU//9cmScS6EnONrauWjazIQv3kCoVD3quUPS+uAXHU7z1SpATpEQchSA78AwD0WVnxa1XkdjURlCJRGQHMfN/EuEjk9jyr4NRN47Hltjc58Gm0sraTjZ/w3l5BLuKkZJdFzT1f5+3Sq3NZjRDNAjaX1orb2BX2wEmkA9fvGGbvW7Q+OlUu+2wlIqdx+h3dzkJVPrda5iQJ93p+DRqcQ/PhsAw8xJ6AfHdkhuIVvoEribLl/jxKOv4Gi34T8omgnb1yOk7sdTA01AiK3J6yoGgP+gaPwHOdOP6LlTlXb3mNYXAlI8da9/e0pJBZovV2BrakYzQK/I3bg0SsiiCqClqs/0wAPB6UOVo6k3+CdEETwm1aPtP+dLlLJPSKAHOYDWCoVLlYTkKAKcCU4vO7IrhErFsLVLPXZ+V0haDcN+v8xjB9strdQfPavUA0ckefRxWNuwVNS6rBRKQB44r+Lmc5f7TRAgaFQyYzb9Dv/4gd18ASQ8/gsC0zwJNJVcw97aeWmOcDtaAW6eLXZLBchTC8EhWXbW6o+cInhMipetuu9OUvTWNnwNodzx+krlvAQIGjmECV+spyH/Ak3F5QDok+OoPXicip2HiJiWTuH6rQx6eh7BxlT0STH4xUbSUl6Df/xAIqaO9bBVn3taKUuy/ZAwYZImpvx4FYjVRgQzOec9r1vK0TmrldMiIDkO45ZXegxLLrRW13P0/heQHQ4CUhIYvfElNIHOtWaztNJ4qZQBqfFKLg3OMz135rNY624ClB0tHJcomTA5ZMGnANbaBmoOHPMy5hvZebNuLCoj71frXIN0i9pDJzj24IsIlUTCo7NI3/KyQg5ArfMleEyKBzmA6r1HO8eV+dSEySEB2G3yRpwZP1c2f+n1GjB07RIlcwNoKi7j3G839EhQF2cg6fmHmbznPRKevJ/GorIedV1wtLVzJesrV9WqQtoIHRfWjreSjwGar1ZRui3Ho7PfwHBGb3jRg6S1roGeoIuNJGBIPKV/zSF31irOrn4HXAu9B1zduhtLecelQxZZ9xTtrgC342Df8IwQyaYqBMKEWo0xaw1BI4d4DNJSWcfF32fRWnuD5NWPEDZ5lIe8NDuHq1v+ha2xGdkho4szYJg1hbj501EH6OgJ5oIS8hf/oWPm5HqNrE51vdt4nC/7k+9bIIT8GYA2Ipixn5jwjQrrZsju0XT5GubTRfiEBqFPisUvOrzPPi0VdeQ9YcJ63bWmxbzphTk7XHKvA/DrlJkfAU+Bcy2N+fA3vZK0WVoxny4idOKIfn+IO7lTz7zRObWCjdMv7VnhruOV9dws9F8u4CsAS1k1J54wYS4o6arWaaS8hvLP998yuZtnisl7wuROLkdjsKzqqtfL45FjB8gzwZnIJy6dS8Jjs3p8ausvHG3tXN26mytZO5W8Rcjsbg1Qze/X45ELHY9I7wHLXG26+CgSl8zFkDGh3zdkF2S7nep9PzhzmnK3FEGwUWOwrJr6zTdeL529EnRhf3LmfCHEBkBZiNrwIAwZkwi9a5Qzh9D6dNvXYW3jZkEJ9UdOOYPwdY/gXgdiufuGuC2C4Hy3kWXrOhmeBLQeA6jV6GLC8Y0KR613Hn+2phZaK69jqah1P/hdsCKLLIfGtnbG+f3eyfHtEHTh38mzom2SY4WQWQjE9tnBE+XIZKuQNrqCcH9wSwRdMGGSJiTnpatwTJOFMIKcgvPVX/kNIcM1gSgC8iTZfii3aEL+7fyG+C+6O8izl1GE5gAAAABJRU5ErkJggg== - :target: https://github.com/biopragmatics/bioregistry - :alt: Powered by the Bioregistry diff --git a/docs/source/conf.py b/docs/source/conf.py index cdb8c95b..8122d204 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +"""Sphinx configuration.""" import os import re @@ -62,13 +62,13 @@ # The short X.Y version. parsed_version = re.match( - "(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?", + r"(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?", release, ) -version = parsed_version.expand("\g.\g.\g") +version = parsed_version.expand(r"\g.\g.\g") if parsed_version.group("release"): - tags.add("prerelease") + tags.add("prerelease") # noqa:F821 # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -360,4 +360,4 @@ autoclass_content = "both" if os.environ.get("READTHEDOCS", None): - tags.add("readthedocs") + tags.add("readthedocs") # noqa:F821 diff --git a/pyproject.toml b/pyproject.toml index ad4426a3..e5e330e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,278 @@ # See https://setuptools.readthedocs.io/en/latest/build_meta.html [build-system] requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta:__legacy__" +build-backend = "setuptools.build_meta" -[tool.black] +[project] +name = "pyobo" +version = "0.10.13-dev" +description = "A python package for handling and generating OBO" +readme = "README.md" +authors = [ + { name = "Charles Tapley Hoyt", email = "cthoyt@gmail.com" } +] +maintainers = [ + { name = "Charles Tapley Hoyt", email = "cthoyt@gmail.com" } +] + +# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#classifiers +# Search tags using the controlled vocabulary at https://pypi.org/classifiers +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Framework :: Pytest", + "Framework :: tox", + "Framework :: Sphinx", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Chemistry", +] +keywords = [ + "snekpack", # please keep this keyword to credit the cookiecutter-snekpack template + "cookiecutter", + "ontologies", + "biomedical ontologies", + "life sciences", + "natural sciences", + "bioinformatics", + "cheminformatics", + "Open Biomedical Ontologies", + "OBO", +] + +# License Information. This can be any valid SPDX identifiers that can be resolved +# with URLs like https://spdx.org/licenses/MIT +# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license +license = { file = "LICENSE" } + +requires-python = ">=3.9" +dependencies = [ + "obonet>=0.3.0", + "click", + "tqdm", + "pyyaml", + "pandas", + "requests", + "protmapper", + "more_itertools", + "more_click>=0.0.2", + "humanize", + "tabulate", + "cachier", + "pystow>=0.2.7", + "bioversions>=0.5.514", + "bioregistry>=0.10.20", + "bioontologies>=0.4.0", + "zenodo-client>=0.0.5", + "class_resolver", + "psycopg2-binary", + "pydantic>=2.0", + # Resource Downloaders + "drugbank_downloader", + "chembl_downloader", + "umls_downloader>=0.1.3", + "typing_extensions", + "rdflib", +] + +[project.optional-dependencies] +tests = [ + "pytest", + "coverage", +] +docs = [ + "sphinx>=8", + "sphinx-rtd-theme>=3.0", + "sphinx-click", + "sphinx_automodapi", +] + +# See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#urls +[project.urls] +"Bug Tracker" = "https://github.com/biopragmatics/pyobo/issues" +Homepage = "https://github.com/biopragmatics/pyobo" +Repository = "https://github.com/biopragmatics/pyobo.git" +Documentation = "https://pyobo.readthedocs.io" + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.setuptools.packages.find] +# this implicitly sets `packages = ":find"` +where = ["src"] # list of folders that contain the packages (["."] by default) + +# See https://setuptools.pypa.io/en/latest/userguide/datafiles.html +[tool.setuptools.package-data] +"*" = ["*.*"] + +[project.scripts] +pyobo = "pyobo.cli:main" + +# TODO add entrypoints +# pyobo.xrefs = +# cbms2019 = pyobo.xrefdb.sources.cbms2019:get_cbms2019_xrefs_df +# chembl = pyobo.xrefdb.sources.chembl:get_chembl_xrefs_df +# compath = pyobo.xrefdb.sources.compath:get_compath_xrefs_df +# famplex = pyobo.xrefdb.sources.famplex:get_famplex_xrefs_df +# gilda = pyobo.xrefdb.sources.gilda:get_gilda_xrefs_df +# intact = pyobo.xrefdb.sources.intact:get_xrefs_df +# ncit = pyobo.xrefdb.sources.ncit:get_ncit_xrefs_df +# pubchem = pyobo.xrefdb.sources.pubchem:get_pubchem_mesh_df +# wikidata = pyobo.xrefdb.sources.wikidata:get_wikidata_xrefs_df +# biomappings = pyobo.xrefdb.sources.biomappings:get_biomappings_df + +[tool.cruft] +skip = [ + "**/__init__.py", + "tests/*" +] + +# MyPy, see https://mypy.readthedocs.io/en/stable/config_file.html +[tool.mypy] +plugins = [ + "pydantic.mypy", +] + +# Doc8, see https://doc8.readthedocs.io/en/stable/readme.html#ini-file-usage +[tool.doc8] +max-line-length = 120 + +# Pytest, see https://docs.pytest.org/en/stable/reference/customize.html#pyproject-toml +[tool.pytest.ini_options] +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", +] + +# Coverage, see https://coverage.readthedocs.io/en/latest/config.html +[tool.coverage.run] +branch = true +source = [ + "pyobo", +] +omit = [ + "tests/*", + "docs/*", +] + +[tool.coverage.paths] +source = [ + "src/pyobo", + ".tox/*/lib/python*/site-packages/pyobo", +] + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + "pragma: no cover", + "raise NotImplementedError", + "if __name__ == \"__main__\":", + "if TYPE_CHECKING:", + "def __str__", + "def __repr__", +] + +[tool.ruff] line-length = 100 -target-version = ["py38", "py39", "py310", "py311"] +extend-include = ["*.ipynb"] + +[tool.ruff.lint] +# See https://docs.astral.sh/ruff/rules +extend-select = [ + "F", # pyflakes + "E", # pycodestyle errors + "W", # pycodestyle warnings + "C90", # mccabe + "I", # isort + "UP", # pyupgrade + "D", # pydocstyle + "DOC", # pydoclint + "B", # bugbear + "S", # bandit + "T20", # print + "N", # pep8 naming + "ERA", # eradicate commented out code + "NPY", # numpy checks + "RUF", # ruff rules + "C4", # comprehensions +] +ignore = [ + "D105", # Missing docstring in magic method + "E203", # Black conflicts with the following + # Everything below is TODO + "ERA001", + "S113", + "C901", + "E501", + "RUF012", + "S320", + "S314", +] + +# See https://docs.astral.sh/ruff/settings/#per-file-ignores +[tool.ruff.lint.per-file-ignores] +# Ignore security issues in the version.py, which are inconsistent +"src/pyobo/version.py" = ["S603", "S607"] +# Ignore commented out code in Sphinx configuration file +"docs/source/conf.py" = ["ERA001"] +# Prints are okay in notebooks +"notebooks/**/*.ipynb" = ["T201"] + +[tool.ruff.lint.pydocstyle] +convention = "pep257" + +[tool.ruff.lint.isort] +relative-imports-order = "closest-to-furthest" +known-third-party = [ + "tqdm", +] +known-first-party = [ + "pyobo", + "tests", +] + +[tool.ruff.format] +# see https://docs.astral.sh/ruff/settings/#format_docstring-code-format +docstring-code-format = true + +[tool.bumpversion] +current_version = "0.10.13-dev" +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(?:-(?P[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?" +serialize = [ + "{major}.{minor}.{patch}-{release}+{build}", + "{major}.{minor}.{patch}+{build}", + "{major}.{minor}.{patch}-{release}", + "{major}.{minor}.{patch}", +] +commit = true +tag = false + +[tool.bumpversion.parts.release] +optional_value = "production" +first_value = "dev" +values = [ + "dev", + "production", +] + +[[tool.bumpversion.files]] +filename = "pyproject.toml" +search = "version = \"{current_version}\"" +replace = "version = \"{new_version}\"" + +[[tool.bumpversion.files]] +filename = "docs/source/conf.py" +search = "release = \"{current_version}\"" +replace = "release = \"{new_version}\"" -[tool.isort] -profile = "black" -multi_line_output = 3 -include_trailing_comma = true -reverse_relative = true +[[tool.bumpversion.files]] +filename = "src/pyobo/version.py" +search = "VERSION = \"{current_version}\"" +replace = "VERSION = \"{new_version}\"" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 47731e40..00000000 --- a/setup.cfg +++ /dev/null @@ -1,196 +0,0 @@ -########################## -# Setup.py Configuration # -########################## -[metadata] -name = pyobo -version = 0.10.13-dev -description = Handling and writing OBO -long_description = file: README.rst - -# URLs associated with the project -url = https://github.com/pyobo/pyobo -download_url = https://github.com/pyobo/pyobo/releases -project_urls = - Bug Tracker = https://github.com/pyobo/pyobo/issues - Source Code = https://github.com/pyobo/pyobo - -# Author information -author = Charles Tapley Hoyt -author_email = cthoyt@gmail.com -maintainer = Charles Tapley Hoyt -maintainer_email = cthoyt@gmail.com - -# License Information -license = MIT -license_files = LICENSE - -# Search tags -classifiers = - Development Status :: 4 - Beta - Environment :: Console - Intended Audience :: Developers - Intended Audience :: Science/Research - License :: OSI Approved :: MIT License - Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3.12 - Programming Language :: Python :: 3.11 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3 :: Only - Topic :: Scientific/Engineering :: Bio-Informatics - Topic :: Scientific/Engineering :: Chemistry -keywords = - Open Biomedical Ontologies - OBO - -[options] -install_requires = - obonet>=0.3.0 - click - tqdm - pyyaml - pandas - requests - protmapper - more_itertools - more_click>=0.0.2 - humanize - tabulate - cachier - pystow>=0.2.7 - bioversions>=0.5.514 - bioregistry>=0.10.20 - bioontologies>=0.4.0 - zenodo-client>=0.0.5 - class_resolver - psycopg2-binary - pydantic>=2.0 - # Resource Downloaders - drugbank_downloader - chembl_downloader - umls_downloader>=0.1.3 - typing_extensions - -# Random options -zip_safe = false -include_package_data = True -python_requires = >=3.9 - -# Where is my code -packages = find: -package_dir = - = src - -[options.packages.find] -where = src - -[options.extras_require] -tests = - coverage - pytest -docs = - sphinx<8.0 - sphinx-rtd-theme - sphinx-click - sphinx_automodapi -web = - flask - bootstrap-flask - flask-admin - flasgger - # psutil is used in the resolver - psutil -agrovoc = - rdflib - -[options.entry_points] -console_scripts = - pyobo = pyobo.cli:main - recurify = pyobo.cli:recurify - -pyobo.xrefs = - cbms2019 = pyobo.xrefdb.sources.cbms2019:get_cbms2019_xrefs_df - chembl = pyobo.xrefdb.sources.chembl:get_chembl_xrefs_df - compath = pyobo.xrefdb.sources.compath:get_compath_xrefs_df - famplex = pyobo.xrefdb.sources.famplex:get_famplex_xrefs_df - gilda = pyobo.xrefdb.sources.gilda:get_gilda_xrefs_df - intact = pyobo.xrefdb.sources.intact:get_xrefs_df - ncit = pyobo.xrefdb.sources.ncit:get_ncit_xrefs_df - pubchem = pyobo.xrefdb.sources.pubchem:get_pubchem_mesh_df - wikidata = pyobo.xrefdb.sources.wikidata:get_wikidata_xrefs_df - biomappings = pyobo.xrefdb.sources.biomappings:get_biomappings_df - - -###################### -# Doc8 Configuration # -# (doc8.ini) # -###################### -[doc8] -max-line-length = 120 - -########################## -# Coverage Configuration # -# (.coveragerc) # -########################## -[coverage:run] -branch = True -source = pyobo -omit = - tests/* - docs/* - -[coverage:paths] -source = - src/pyobo - .tox/*/lib/python*/site-packages/pyobo - -[coverage:report] -show_missing = True -exclude_lines = - def __str__ - def __repr__ - -########################## -# Darglint Configuration # -########################## -[darglint] -docstring_style=sphinx -strictness=short - -######################### -# Flake8 Configuration # -######################### -[flake8] -ignore = - S310 -# line break before binary operator - W503 -# XML stuff - S405 - S314 -# Whitespace stuff - E203 -# Pickle stuff - S301 - S403 - # too complicated - C901 -exclude = - .tox, - .git, - __pycache__, - docs/source/conf.py, - build, - dist, - tests/fixtures/*, - *.pyc, - *.egg-info, - .cache, - .eggs -max-complexity = 25 -max-line-length = 120 -import-order-style = pycharm -application-import-names = - pyobo - tests diff --git a/src/pyobo/__init__.py b/src/pyobo/__init__.py index 85e0dd17..0891d747 100644 --- a/src/pyobo/__init__.py +++ b/src/pyobo/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """A python package for handling and generating OBO.""" from .api import ( # noqa: F401 diff --git a/src/pyobo/__main__.py b/src/pyobo/__main__.py index d7ee89cf..f213a850 100644 --- a/src/pyobo/__main__.py +++ b/src/pyobo/__main__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for PyOBO.""" from .cli import main diff --git a/src/pyobo/api/__init__.py b/src/pyobo/api/__init__.py index 3976a32b..857c2598 100644 --- a/src/pyobo/api/__init__.py +++ b/src/pyobo/api/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """High-level API for accessing content.""" from .alts import ( # noqa: F401 diff --git a/src/pyobo/api/alts.py b/src/pyobo/api/alts.py index 6122c7cf..f18667e1 100644 --- a/src/pyobo/api/alts.py +++ b/src/pyobo/api/alts.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """High-level API for alternative identifiers.""" import logging +from collections.abc import Mapping from functools import lru_cache -from typing import List, Mapping, Optional +from typing import Optional from .utils import get_version from ..getters import get_ontology @@ -26,11 +25,11 @@ } -@lru_cache() +@lru_cache @wrap_norm_prefix def get_id_to_alts( prefix: str, *, force: bool = False, version: Optional[str] = None -) -> Mapping[str, List[str]]: +) -> Mapping[str, list[str]]: """Get alternate identifiers.""" if prefix in NO_ALTS: return {} @@ -41,7 +40,7 @@ def get_id_to_alts( header = [f"{prefix}_id", "alt_id"] @cached_multidict(path=path, header=header, force=force) - def _get_mapping() -> Mapping[str, List[str]]: + def _get_mapping() -> Mapping[str, list[str]]: if force: logger.info(f"[{prefix}] forcing reload for alts") else: @@ -52,7 +51,7 @@ def _get_mapping() -> Mapping[str, List[str]]: return _get_mapping() -@lru_cache() +@lru_cache @wrap_norm_prefix def get_alts_to_id( prefix: str, *, force: bool = False, version: Optional[str] = None diff --git a/src/pyobo/api/hierarchy.py b/src/pyobo/api/hierarchy.py index 095cfbec..185624a6 100644 --- a/src/pyobo/api/hierarchy.py +++ b/src/pyobo/api/hierarchy.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """High-level API for hierarchies.""" import logging +from collections.abc import Iterable from functools import lru_cache -from typing import Iterable, Optional, Set, Tuple +from typing import Optional import networkx as nx @@ -70,13 +69,13 @@ def get_hierarchy( ) -@lru_cache() +@lru_cache @wrap_norm_prefix def _get_hierarchy_helper( prefix: str, *, - extra_relations: Tuple[TypeDef, ...], - properties: Tuple[str, ...], + extra_relations: tuple[TypeDef, ...], + properties: tuple[str, ...], include_part_of: bool, include_has_member: bool, use_tqdm: bool, @@ -161,13 +160,13 @@ def is_descendent( Check that go:0070246 ! natural killer cell apoptotic process is a descendant of go:0006915 ! apoptotic process:: - >>> assert is_descendent('go', '0070246', 'go', '0006915') + >>> assert is_descendent("go", "0070246", "go", "0006915") """ descendants = get_descendants(ancestor_prefix, ancestor_identifier, version=version) return descendants is not None and f"{prefix}:{identifier}" in descendants -@lru_cache() +@lru_cache def get_descendants( prefix: str, identifier: Optional[str] = None, @@ -176,7 +175,7 @@ def get_descendants( use_tqdm: bool = False, force: bool = False, **kwargs, -) -> Optional[Set[str]]: +) -> Optional[set[str]]: """Get all the descendants (children) of the term as CURIEs.""" curie, prefix, identifier = _pic(prefix, identifier) hierarchy = get_hierarchy( @@ -192,7 +191,7 @@ def get_descendants( return nx.ancestors(hierarchy, curie) # note this is backwards -def _pic(prefix, identifier=None) -> Tuple[str, str, str]: +def _pic(prefix, identifier=None) -> tuple[str, str, str]: if identifier is None: curie = prefix prefix, identifier = prefix.split(":") @@ -201,7 +200,7 @@ def _pic(prefix, identifier=None) -> Tuple[str, str, str]: return curie, prefix, identifier -@lru_cache() +@lru_cache def get_children( prefix: str, identifier: Optional[str] = None, @@ -210,7 +209,7 @@ def get_children( use_tqdm: bool = False, force: bool = False, **kwargs, -) -> Optional[Set[str]]: +) -> Optional[set[str]]: """Get all the descendants (children) of the term as CURIEs.""" curie, prefix, identifier = _pic(prefix, identifier) hierarchy = get_hierarchy( @@ -232,13 +231,13 @@ def has_ancestor( """Check that the first identifier has the second as an ancestor. Check that go:0008219 ! cell death is an ancestor of go:0006915 ! apoptotic process:: - >>> assert has_ancestor('go', '0006915', 'go', '0008219') + >>> assert has_ancestor("go", "0006915", "go", "0008219") """ ancestors = get_ancestors(prefix, identifier, version=version) return ancestors is not None and f"{ancestor_prefix}:{ancestor_identifier}" in ancestors -@lru_cache() +@lru_cache def get_ancestors( prefix: str, identifier: Optional[str] = None, @@ -247,7 +246,7 @@ def get_ancestors( use_tqdm: bool = False, force: bool = False, **kwargs, -) -> Optional[Set[str]]: +) -> Optional[set[str]]: """Get all the ancestors (parents) of the term as CURIEs.""" curie, prefix, identifier = _pic(prefix, identifier) hierarchy = get_hierarchy( diff --git a/src/pyobo/api/metadata.py b/src/pyobo/api/metadata.py index 641f0b02..7ff72d9d 100644 --- a/src/pyobo/api/metadata.py +++ b/src/pyobo/api/metadata.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """High-level API for metadata.""" import logging +from collections.abc import Mapping from functools import lru_cache -from typing import Mapping, Optional +from typing import Optional from .utils import get_version from ..getters import get_ontology @@ -19,7 +18,7 @@ logger = logging.getLogger(__name__) -@lru_cache() +@lru_cache @wrap_norm_prefix def get_metadata( prefix: str, *, force: bool = False, version: Optional[str] = None diff --git a/src/pyobo/api/names.py b/src/pyobo/api/names.py index f7443825..b619b9b1 100644 --- a/src/pyobo/api/names.py +++ b/src/pyobo/api/names.py @@ -1,19 +1,18 @@ -# -*- coding: utf-8 -*- - """High-level API for nomenclature.""" from __future__ import annotations import logging import subprocess +from collections.abc import Mapping from functools import lru_cache -from typing import Callable, List, Mapping, Optional, Set, TypeVar +from typing import Callable, TypeVar from curies import Reference, ReferenceTuple from .alts import get_primary_identifier from .utils import get_version -from ..getters import NoBuild, get_ontology +from ..getters import NoBuildError, get_ontology from ..identifier_utils import normalize_curie, wrap_norm_prefix from ..utils.cache import cached_collection, cached_mapping, cached_multidict from ..utils.path import prefix_cache_join @@ -34,7 +33,7 @@ logger = logging.getLogger(__name__) -def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[str]: +def get_name_by_curie(curie: str, *, version: str | None = None) -> str | None: """Get the name for a CURIE, if possible.""" if version is None: version = get_version(curie.split(":")[0]) @@ -46,8 +45,8 @@ def get_name_by_curie(curie: str, *, version: Optional[str] = None) -> Optional[ X = TypeVar("X") -NO_BUILD_PREFIXES: Set[str] = set() -NO_BUILD_LOGGED: Set = set() +NO_BUILD_PREFIXES: set[str] = set() +NO_BUILD_LOGGED: set = set() def _help_get( @@ -56,12 +55,12 @@ def _help_get( identifier: str, force: bool = False, strict: bool = False, - version: Optional[str] = None, -) -> Optional[X]: + version: str | None = None, +) -> X | None: """Get the result for an entity based on a mapping maker function ``f``.""" try: mapping = f(prefix, force=force, strict=strict, version=version) # type:ignore - except NoBuild: + except NoBuildError: if prefix not in NO_BUILD_PREFIXES: logger.warning("[%s] unable to look up results with %s", prefix, f) NO_BUILD_PREFIXES.add(prefix) @@ -85,22 +84,22 @@ def _help_get( @wrap_norm_prefix def get_name( prefix: str | Reference | ReferenceTuple, - identifier: Optional[str] = None, + identifier: str | None = None, /, *, - version: Optional[str] = None, -) -> Optional[str]: + version: str | None = None, +) -> str | None: """Get the name for an entity.""" if isinstance(prefix, (ReferenceTuple, Reference)): prefix, identifier = prefix.prefix, prefix.identifier return _help_get(get_id_name_mapping, prefix, identifier, version=version) # type:ignore -@lru_cache() +@lru_cache @wrap_norm_prefix def get_ids( - prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None -) -> Set[str]: + prefix: str, *, force: bool = False, strict: bool = False, version: str | None = None +) -> set[str]: """Get the set of identifiers for this prefix.""" if prefix == "ncbigene": from ..sources.ncbigene import get_ncbigene_ids @@ -115,7 +114,7 @@ def get_ids( path = prefix_cache_join(prefix, name="ids.tsv", version=version) @cached_collection(path=path, force=force) - def _get_ids() -> Set[str]: + def _get_ids() -> set[str]: if force: logger.info("[%s v%s] forcing reload for names", prefix, version) else: @@ -128,10 +127,10 @@ def _get_ids() -> Set[str]: return set(_get_ids()) -@lru_cache() +@lru_cache @wrap_norm_prefix def get_id_name_mapping( - prefix: str, *, force: bool = False, strict: bool = False, version: Optional[str] = None + prefix: str, *, force: bool = False, strict: bool = False, version: str | None = None ) -> Mapping[str, str]: """Get an identifier to name mapping for the OBO file.""" if prefix == "ncbigene": @@ -157,7 +156,7 @@ def _get_id_name_mapping() -> Mapping[str, str]: try: return _get_id_name_mapping() - except NoBuild: + except NoBuildError: logger.debug("[%s] no build", prefix) return {} except (Exception, subprocess.CalledProcessError) as e: @@ -165,10 +164,10 @@ def _get_id_name_mapping() -> Mapping[str, str]: return {} -@lru_cache() +@lru_cache @wrap_norm_prefix def get_name_id_mapping( - prefix: str, *, force: bool = False, version: Optional[str] = None + prefix: str, *, force: bool = False, version: str | None = None ) -> Mapping[str, str]: """Get a name to identifier mapping for the OBO file.""" id_name = get_id_name_mapping(prefix=prefix, force=force, version=version) @@ -177,8 +176,8 @@ def get_name_id_mapping( @wrap_norm_prefix def get_definition( - prefix: str, identifier: str | None = None, *, version: Optional[str] = None -) -> Optional[str]: + prefix: str, identifier: str | None = None, *, version: str | None = None +) -> str | None: """Get the definition for an entity.""" if identifier is None: prefix, _, identifier = prefix.rpartition(":") @@ -190,7 +189,7 @@ def get_id_definition_mapping( *, force: bool = False, strict: bool = False, - version: Optional[str] = None, + version: str | None = None, ) -> Mapping[str, str]: """Get a mapping of descriptions.""" if version is None: @@ -213,15 +212,15 @@ def get_obsolete( *, force: bool = False, strict: bool = False, - version: Optional[str] = None, -) -> Set[str]: + version: str | None = None, +) -> set[str]: """Get the set of obsolete local unique identifiers.""" if version is None: version = get_version(prefix) path = prefix_cache_join(prefix, name="obsolete.tsv", version=version) @cached_collection(path=path, force=force) - def _get_obsolete() -> Set[str]: + def _get_obsolete() -> set[str]: ontology = get_ontology(prefix, force=force, strict=strict, version=version) return ontology.get_obsolete() @@ -229,7 +228,7 @@ def _get_obsolete() -> Set[str]: @wrap_norm_prefix -def get_synonyms(prefix: str, identifier: str) -> Optional[List[str]]: +def get_synonyms(prefix: str, identifier: str) -> list[str] | None: """Get the synonyms for an entity.""" return _help_get(get_id_synonyms_mapping, prefix, identifier) @@ -240,15 +239,15 @@ def get_id_synonyms_mapping( *, force: bool = False, strict: bool = False, - version: Optional[str] = None, -) -> Mapping[str, List[str]]: + version: str | None = None, +) -> Mapping[str, list[str]]: """Get the OBO file and output a synonym dictionary.""" if version is None: version = get_version(prefix) path = prefix_cache_join(prefix, name="synonyms.tsv", version=version) @cached_multidict(path=path, header=[f"{prefix}_id", "synonym"], force=force) - def _get_multidict() -> Mapping[str, List[str]]: + def _get_multidict() -> Mapping[str, list[str]]: logger.info("[%s v%s] no cached synonyms found. getting from OBO loader", prefix, version) ontology = get_ontology(prefix, force=force, strict=strict, version=version) return ontology.get_id_synonyms_mapping() diff --git a/src/pyobo/api/properties.py b/src/pyobo/api/properties.py index 75ee5667..d80d0e55 100644 --- a/src/pyobo/api/properties.py +++ b/src/pyobo/api/properties.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """High-level API for properties.""" import logging import os -from typing import List, Mapping, Optional +from collections.abc import Mapping +from typing import Optional import pandas as pd @@ -105,7 +104,7 @@ def get_filtered_properties_multimapping( use_tqdm: bool = False, force: bool = False, version: Optional[str] = None, -) -> Mapping[str, List[str]]: +) -> Mapping[str, list[str]]: """Extract multiple properties for each term as a dictionary. :param prefix: the resource to load @@ -120,7 +119,7 @@ def get_filtered_properties_multimapping( all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version) @cached_multidict(path=path, header=[f"{prefix}_id", prop], force=force) - def _mapping_getter() -> Mapping[str, List[str]]: + def _mapping_getter() -> Mapping[str, list[str]]: if os.path.exists(all_properties_path): logger.info("[%s] loading pre-cached properties", prefix) df = pd.read_csv(all_properties_path, sep="\t") @@ -144,7 +143,7 @@ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[ :returns: The single value for the property. If multiple are expected, use :func:`get_properties` >>> import pyobo - >>> pyobo.get_property('chebi', '132964', 'http://purl.obolibrary.org/obo/chebi/smiles') + >>> pyobo.get_property("chebi", "132964", "http://purl.obolibrary.org/obo/chebi/smiles") "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F" """ filtered_properties_mapping = get_filtered_properties_mapping( @@ -153,7 +152,7 @@ def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[ return filtered_properties_mapping.get(identifier) -def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[List[str]]: +def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[list[str]]: """Extract a set of properties for the given entity. :param prefix: the resource to load diff --git a/src/pyobo/api/relations.py b/src/pyobo/api/relations.py index 8472748e..3429e1df 100644 --- a/src/pyobo/api/relations.py +++ b/src/pyobo/api/relations.py @@ -1,11 +1,10 @@ -# -*- coding: utf-8 -*- - """High-level API for relations.""" import logging import os +from collections.abc import Mapping from functools import lru_cache -from typing import List, Mapping, Optional +from typing import Optional import networkx as nx import pandas as pd @@ -121,7 +120,7 @@ def get_id_multirelations_mapping( use_tqdm: bool = False, force: bool = False, version: Optional[str] = None, -) -> Mapping[str, List[Reference]]: +) -> Mapping[str, list[Reference]]: """Get the OBO file and output a synonym dictionary.""" if version is None: version = get_version(prefix) @@ -129,7 +128,7 @@ def get_id_multirelations_mapping( return ontology.get_id_multirelations_mapping(typedef=typedef, use_tqdm=use_tqdm) -@lru_cache() +@lru_cache @wrap_norm_prefix def get_relation_mapping( prefix: str, @@ -147,9 +146,9 @@ def get_relation_mapping( Example usage: get homology between HGNC and MGI: >>> import pyobo - >>> human_mapt_hgnc_id = '6893' - >>> mouse_mapt_mgi_id = '97180' - >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping('hgnc', 'ro:HOM0000017', 'mgi') + >>> human_mapt_hgnc_id = "6893" + >>> mouse_mapt_mgi_id = "97180" + >>> hgnc_mgi_orthology_mapping = pyobo.get_relation_mapping("hgnc", "ro:HOM0000017", "mgi") >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id] """ if version is None: @@ -178,9 +177,11 @@ def get_relation( Example usage: get homology between MAPT in HGNC and MGI: >>> import pyobo - >>> human_mapt_hgnc_id = '6893' - >>> mouse_mapt_mgi_id = '97180' - >>> assert mouse_mapt_mgi_id == pyobo.get_relation('hgnc', human_mapt_hgnc_id, 'ro:HOM0000017', 'mgi') + >>> human_mapt_hgnc_id = "6893" + >>> mouse_mapt_mgi_id = "97180" + >>> assert mouse_mapt_mgi_id == pyobo.get_relation( + ... "hgnc", human_mapt_hgnc_id, "ro:HOM0000017", "mgi" + ... ) """ relation_mapping = get_relation_mapping( prefix=prefix, diff --git a/src/pyobo/api/species.py b/src/pyobo/api/species.py index 5c2d2222..14aea1bd 100644 --- a/src/pyobo/api/species.py +++ b/src/pyobo/api/species.py @@ -1,14 +1,13 @@ -# -*- coding: utf-8 -*- - """High-level API for species.""" import logging +from collections.abc import Mapping from functools import lru_cache -from typing import Mapping, Optional +from typing import Optional from .alts import get_primary_identifier from .utils import get_version -from ..getters import NoBuild, get_ontology +from ..getters import NoBuildError, get_ontology from ..identifier_utils import wrap_norm_prefix from ..utils.cache import cached_mapping from ..utils.path import prefix_cache_join @@ -29,7 +28,7 @@ def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) try: id_species = get_id_species_mapping(prefix, version=version) - except NoBuild: + except NoBuildError: logger.warning("unable to look up species for prefix %s", prefix) return None @@ -41,7 +40,7 @@ def get_species(prefix: str, identifier: str, *, version: Optional[str] = None) return id_species.get(primary_id) -@lru_cache() +@lru_cache @wrap_norm_prefix def get_id_species_mapping( prefix: str, diff --git a/src/pyobo/api/typedefs.py b/src/pyobo/api/typedefs.py index 6d390672..3fc2d638 100644 --- a/src/pyobo/api/typedefs.py +++ b/src/pyobo/api/typedefs.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """High-level API for typedefs.""" import logging @@ -21,7 +19,7 @@ logger = logging.getLogger(__name__) -@lru_cache() +@lru_cache @wrap_norm_prefix def get_typedef_df( prefix: str, *, force: bool = False, version: Optional[str] = None diff --git a/src/pyobo/api/utils.py b/src/pyobo/api/utils.py index 180d0399..a7050f48 100644 --- a/src/pyobo/api/utils.py +++ b/src/pyobo/api/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Utilities for high-level API.""" import json @@ -13,6 +11,7 @@ from ..utils.path import prefix_directory_join __all__ = [ + "safe_get_version", "get_version", "get_version_pins", "VersionError", @@ -53,6 +52,14 @@ def get_version(prefix: str) -> Optional[str]: return None +def safe_get_version(prefix: str) -> str: + """Get the version.""" + v = get_version(prefix) + if v is None: + raise ValueError + return v + + @lru_cache(1) def get_version_pins() -> dict[str, str]: """Retrieve user-defined resource version pins. diff --git a/src/pyobo/api/xrefs.py b/src/pyobo/api/xrefs.py index 9bfcd2e0..bd76fd95 100644 --- a/src/pyobo/api/xrefs.py +++ b/src/pyobo/api/xrefs.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """High-level API for synonyms.""" import logging +from collections.abc import Mapping from functools import lru_cache -from typing import List, Mapping, Optional, Tuple, Union +from typing import Optional, Union import pandas as pd from tqdm.auto import tqdm @@ -43,7 +42,7 @@ def get_xref( return filtered_xrefs.get(identifier) -@lru_cache() +@lru_cache @wrap_norm_prefix def get_filtered_xrefs( prefix: str, @@ -147,7 +146,7 @@ def get_sssom_df( prefix = prefix.ontology else: df = get_xrefs_df(prefix=prefix, **kwargs) - rows: List[Tuple[str, ...]] = [] + rows: list[tuple[str, ...]] = [] with logging_redirect_tqdm(): for source_id, target_prefix, target_id in tqdm( df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM" diff --git a/src/pyobo/aws.py b/src/pyobo/aws.py index ac9471fc..0d48bf4a 100644 --- a/src/pyobo/aws.py +++ b/src/pyobo/aws.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Interface for caching data on AWS S3.""" import logging import os -from typing import Optional, Set +from typing import Optional import boto3 import humanize @@ -57,8 +55,8 @@ def download_artifacts(bucket: str, suffix: Optional[str] = None) -> None: def upload_artifacts( bucket: str, - whitelist: Optional[Set[str]] = None, - blacklist: Optional[Set[str]] = None, + whitelist: Optional[set[str]] = None, + blacklist: Optional[set[str]] = None, s3_client=None, ) -> None: """Upload all artifacts to AWS.""" diff --git a/src/pyobo/cli/__init__.py b/src/pyobo/cli/__init__.py index 371258de..fd9c251a 100644 --- a/src/pyobo/cli/__init__.py +++ b/src/pyobo/cli/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for PyOBO.""" from .cli import main diff --git a/src/pyobo/cli/aws.py b/src/pyobo/cli/aws.py index 9a7ae7bb..00f8b081 100644 --- a/src/pyobo/cli/aws.py +++ b/src/pyobo/cli/aws.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for PyOBO's interface to S3.""" import click diff --git a/src/pyobo/cli/cli.py b/src/pyobo/cli/cli.py index 0f9b4455..51deb620 100644 --- a/src/pyobo/cli/cli.py +++ b/src/pyobo/cli/cli.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for PyOBO.""" import logging diff --git a/src/pyobo/cli/database.py b/src/pyobo/cli/database.py index b040941b..b4391962 100644 --- a/src/pyobo/cli/database.py +++ b/src/pyobo/cli/database.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for PyOBO Database Generation.""" import logging @@ -317,7 +315,7 @@ def properties(directory: str, zenodo: bool, force: bool, no_strict: bool): @zenodo_option @force_option @no_strict_option -def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool): # noqa: D202 +def xrefs(directory: str, zenodo: bool, force: bool, no_strict: bool): """Make the prefix-identifier-xref dump.""" with logging_redirect_tqdm(): paths = db_output_helper( diff --git a/src/pyobo/cli/lookup.py b/src/pyobo/cli/lookup.py index cf2f2b10..50ef537c 100644 --- a/src/pyobo/cli/lookup.py +++ b/src/pyobo/cli/lookup.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for PyOBO lookups.""" import json diff --git a/src/pyobo/cli/utils.py b/src/pyobo/cli/utils.py index 52188ec4..b1d62f61 100644 --- a/src/pyobo/cli/utils.py +++ b/src/pyobo/cli/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Utilities for the CLI.""" import datetime diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index 16e6d4e1..ad3e81d2 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Constants for PyOBO.""" import logging diff --git a/src/pyobo/getters.py b/src/pyobo/getters.py index 53dc8392..69d3b4ef 100644 --- a/src/pyobo/getters.py +++ b/src/pyobo/getters.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Utilities for OBO files.""" import datetime @@ -11,16 +9,11 @@ import typing import urllib.error from collections import Counter +from collections.abc import Iterable, Mapping, Sequence from pathlib import Path from typing import ( Callable, - Iterable, - List, - Mapping, Optional, - Sequence, - Set, - Tuple, TypeVar, Union, ) @@ -30,7 +23,7 @@ from tqdm.auto import tqdm from .constants import DATABASE_DIRECTORY -from .identifier_utils import MissingPrefix, wrap_norm_prefix +from .identifier_utils import MissingPrefixError, wrap_norm_prefix from .plugins import has_nomenclature_plugin, run_nomenclature_plugin from .struct import Obo from .utils.io import get_writer @@ -39,17 +32,17 @@ __all__ = [ "get_ontology", - "NoBuild", + "NoBuildError", ] logger = logging.getLogger(__name__) -class NoBuild(RuntimeError): +class NoBuildError(RuntimeError): """Base exception for being unable to build.""" -class UnhandledFormat(NoBuild): +class UnhandledFormatError(NoBuildError): """Only OWL is available.""" @@ -117,7 +110,7 @@ def get_ontology( ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version) if path is None: - raise NoBuild(prefix) + raise NoBuildError(prefix) elif ontology_format == "obo": pass # all gucci elif ontology_format == "owl": @@ -127,7 +120,7 @@ def get_ontology( robot.convert(path, _converted_obo_path, check=robot_check) path = _converted_obo_path else: - raise UnhandledFormat(f"[{prefix}] unhandled ontology file format: {path.suffix}") + raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}") from .reader import from_obo_path @@ -147,8 +140,8 @@ def get_ontology( def _ensure_ontology_path( prefix: str, force, version -) -> Union[Tuple[str, Path], Tuple[None, None]]: - for ontology_format, url in [ # noqa:B007 +) -> Union[tuple[str, Path], tuple[None, None]]: + for ontology_format, url in [ ("obo", bioregistry.get_obo_download(prefix)), ("owl", bioregistry.get_owl_download(prefix)), ("json", bioregistry.get_json_download(prefix)), @@ -246,7 +239,7 @@ def iter_helper( leave: bool = False, strict: bool = True, **kwargs, -) -> Iterable[Tuple[str, str, X]]: +) -> Iterable[tuple[str, str, X]]: """Yield all mappings extracted from each database given.""" for prefix, mapping in iter_helper_helper(f, strict=strict, **kwargs): it = tqdm( @@ -266,7 +259,7 @@ def _prefixes( skip_below: Optional[str] = None, skip_below_inclusive: bool = True, skip_pyobo: bool = False, - skip_set: Optional[Set[str]] = None, + skip_set: Optional[set[str]] = None, ) -> Iterable[str]: for prefix, resource in sorted(bioregistry.read_registry().items()): if resource.no_own_terms: @@ -299,10 +292,10 @@ def iter_helper_helper( skip_below: Optional[str] = None, skip_below_inclusive: bool = True, skip_pyobo: bool = False, - skip_set: Optional[Set[str]] = None, + skip_set: Optional[set[str]] = None, strict: bool = True, **kwargs, -) -> Iterable[Tuple[str, X]]: +) -> Iterable[tuple[str, X]]: """Yield all mappings extracted from each database given. :param f: A function that takes a prefix and gives back something that will be used by an outer function. @@ -342,13 +335,13 @@ def iter_helper_helper( logger.warning("[%s] unable to download", prefix) if strict and not bioregistry.is_deprecated(prefix): raise - except MissingPrefix as e: + except MissingPrefixError as e: logger.warning("[%s] missing prefix: %s", prefix, e) if strict and not bioregistry.is_deprecated(prefix): raise e except subprocess.CalledProcessError: logger.warning("[%s] ROBOT was unable to convert OWL to OBO", prefix) - except UnhandledFormat as e: + except UnhandledFormatError as e: logger.warning("[%s] %s", prefix, e) except ValueError as e: if _is_xml(e): @@ -390,7 +383,7 @@ def _prep_dir(directory: Union[None, str, pathlib.Path]) -> pathlib.Path: def db_output_helper( - f: Callable[..., Iterable[Tuple[str, ...]]], + f: Callable[..., Iterable[tuple[str, ...]]], db_name: str, columns: Sequence[str], *, @@ -399,7 +392,7 @@ def db_output_helper( use_gzip: bool = True, summary_detailed: Optional[Sequence[int]] = None, **kwargs, -) -> List[pathlib.Path]: +) -> list[pathlib.Path]: """Help output database builds. :param f: A function that takes a prefix and gives back something that will be used by an outer function. @@ -413,7 +406,7 @@ def db_output_helper( directory = _prep_dir(directory) c: typing.Counter[str] = Counter() - c_detailed: typing.Counter[Tuple[str, ...]] = Counter() + c_detailed: typing.Counter[tuple[str, ...]] = Counter() if use_gzip: db_path = directory.joinpath(f"{db_name}.tsv.gz") @@ -475,7 +468,7 @@ def db_output_helper( indent=2, ) - rv: List[pathlib.Path] = [ + rv: list[pathlib.Path] = [ db_metadata_path, db_path, db_sample_path, diff --git a/src/pyobo/gilda_utils.py b/src/pyobo/gilda_utils.py index e01cae99..91b1db6d 100644 --- a/src/pyobo/gilda_utils.py +++ b/src/pyobo/gilda_utils.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """PyOBO's Gilda utilities.""" import logging +from collections.abc import Iterable from subprocess import CalledProcessError -from typing import Iterable, List, Optional, Tuple, Type, Union +from typing import Optional, Union import bioregistry import gilda.api @@ -22,7 +21,7 @@ get_ids, get_obsolete, ) -from pyobo.getters import NoBuild +from pyobo.getters import NoBuildError from pyobo.utils.io import multidict __all__ = [ @@ -41,7 +40,7 @@ def iter_gilda_prediction_tuples( grounder: Optional[Grounder] = None, identifiers_are_names: bool = False, strict: bool = False, -) -> Iterable[Tuple[str, str, str, str, str, str, str, str, float]]: +) -> Iterable[tuple[str, str, str, str, str, str, str, str, float]]: """Iterate over prediction tuples for a given prefix.""" if grounder is None: grounder = gilda.api.grounder @@ -94,7 +93,7 @@ def get_grounder( prefixes: Union[str, Iterable[str]], *, unnamed: Optional[Iterable[str]] = None, - grounder_cls: Optional[Type[Grounder]] = None, + grounder_cls: Optional[type[Grounder]] = None, versions: Union[None, str, Iterable[Union[str, None]]] = None, strict: bool = True, skip_obsolete: bool = False, @@ -115,7 +114,7 @@ def get_grounder( if len(prefixes) != len(versions): raise ValueError - terms: List[gilda.term.Term] = [] + terms: list[gilda.term.Term] = [] for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions): try: p_terms = list( @@ -128,7 +127,7 @@ def get_grounder( progress=progress, ) ) - except (NoBuild, CalledProcessError): + except (NoBuildError, CalledProcessError): continue else: terms.extend(p_terms) @@ -251,7 +250,7 @@ def get_gilda_terms( def get_gilda_term_subset( - source: str, ancestors: Union[str, List[str]], **kwargs + source: str, ancestors: Union[str, list[str]], **kwargs ) -> Iterable[gilda.term.Term]: """Get a subset of terms.""" subset = { @@ -264,7 +263,7 @@ def get_gilda_term_subset( yield term -def _ensure_list(s: Union[str, List[str]]) -> List[str]: +def _ensure_list(s: Union[str, list[str]]) -> list[str]: if isinstance(s, str): return [s] return s diff --git a/src/pyobo/identifier_utils.py b/src/pyobo/identifier_utils.py index c2d3de0e..35f7dbdb 100644 --- a/src/pyobo/identifier_utils.py +++ b/src/pyobo/identifier_utils.py @@ -1,12 +1,9 @@ -# -*- coding: utf-8 -*- - """Utilities for handling prefixes.""" from __future__ import annotations import logging from functools import wraps -from typing import Optional, Tuple, Union import bioregistry from curies import Reference, ReferenceTuple @@ -28,10 +25,11 @@ logger = logging.getLogger(__name__) -class MissingPrefix(ValueError): +class MissingPrefixError(ValueError): """Raised on a missing prefix.""" - def __init__(self, prefix, curie, xref=None, ontology=None): + def __init__(self, prefix: str, curie: str, xref: str | None =None, ontology: str | None=None): + """Initialize the error.""" self.prefix = prefix self.curie = curie self.xref = xref @@ -50,13 +48,13 @@ def __str__(self) -> str: return s -def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> Optional[str]: +def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True) -> str | None: """Normalize a namespace and return, if possible.""" norm_prefix = bioregistry.normalize_prefix(prefix) if norm_prefix is not None: return norm_prefix elif strict: - raise MissingPrefix(prefix=prefix, curie=curie, xref=xref) + raise MissingPrefixError(prefix=prefix, curie=curie, xref=xref) else: return None @@ -64,9 +62,7 @@ def _normalize_prefix(prefix: str, *, curie=None, xref=None, strict: bool = True BAD_CURIES = set() -def normalize_curie( - curie: str, *, strict: bool = True -) -> Union[Tuple[str, str], Tuple[None, None]]: +def normalize_curie(curie: str, *, strict: bool = True) -> tuple[str, str] | tuple[None, None]: """Parse a string that looks like a CURIE. :param curie: A compact uniform resource identifier (CURIE) diff --git a/src/pyobo/mocks.py b/src/pyobo/mocks.py index 958f59cc..959edb30 100644 --- a/src/pyobo/mocks.py +++ b/src/pyobo/mocks.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- - """Mocks for PyOBO.""" -from typing import List, Mapping, Optional, Tuple, TypeVar, Union +from collections.abc import Mapping +from typing import Optional, TypeVar, Union from unittest import mock import pandas as pd @@ -25,7 +24,7 @@ def get_mock_id_name_mapping(data: Mapping[str, Mapping[str, str]]) -> mock._pat return _replace_mapping_getter("pyobo.api.names.get_id_name_mapping", data) -def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> mock._patch: +def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch: """Mock the :func:`pyobo.extract.get_id_synonyms_mapping` function. :param data: A mapping from prefix to mappings of identifier to lists of synonyms. @@ -33,7 +32,7 @@ def get_mock_id_synonyms_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> return _replace_mapping_getter("pyobo.api.names.get_id_synonyms_mapping", data) -def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, List[str]]]) -> mock._patch: +def get_mock_id_alts_mapping(data: Mapping[str, Mapping[str, list[str]]]) -> mock._patch: """Mock the :func:`pyobo.extract.get_id_to_alts` function. :param data: A mapping from prefix to mappings of identifier to lists of alternative identifiers. @@ -52,7 +51,7 @@ def _mock_get_data(prefix: str, **_kwargs) -> Mapping[str, X]: def get_mock_get_xrefs_df( - df: Union[List[Tuple[str, str, str, str, str]], pd.DataFrame] + df: Union[list[tuple[str, str, str, str, str]], pd.DataFrame], ) -> mock._patch: """Mock the :func:`pyobo.xrefsdb.xrefs_pipeline.get_xref_df` function. diff --git a/src/pyobo/normalizer.py b/src/pyobo/normalizer.py index a8527c57..9e807df4 100644 --- a/src/pyobo/normalizer.py +++ b/src/pyobo/normalizer.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- - """Use synonyms from OBO to normalize names.""" import logging from abc import ABC, abstractmethod +from collections.abc import Iterable, Mapping from dataclasses import dataclass from functools import lru_cache -from typing import Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union +from typing import Optional, Union import bioregistry @@ -23,29 +22,29 @@ logger = logging.getLogger(__name__) -NormalizationSuccess = Tuple[str, str, str] -NormalizationFailure = Tuple[None, None, str] +NormalizationSuccess = tuple[str, str, str] +NormalizationFailure = tuple[None, None, str] NormalizationResult = Union[NormalizationSuccess, NormalizationFailure] class Normalizer(ABC): """A normalizer.""" - id_to_name: Dict[str, str] - id_to_synonyms: Dict[str, List[str]] + id_to_name: dict[str, str] + id_to_synonyms: dict[str, list[str]] #: A mapping from all synonyms to the set of identifiers that they point to. #: In a perfect world, each would only be a single element. - synonym_to_identifiers_mapping: Dict[str, Set[str]] + synonym_to_identifiers_mapping: dict[str, set[str]] #: A mapping from normalized names to the actual ones that they came from - norm_name_to_name: Dict[str, Set[str]] + norm_name_to_name: dict[str, set[str]] def __init__( self, - id_to_name: Dict[str, str], - id_to_synonyms: Dict[str, List[str]], + id_to_name: dict[str, str], + id_to_synonyms: dict[str, list[str]], remove_prefix: Optional[str] = None, - ) -> None: # noqa: D107 + ) -> None: """Initialize the normalizer. :param id_to_name: An identifier to name dictionary. @@ -64,7 +63,7 @@ def __init__( self.norm_name_to_name = self._get_norm_name_to_names(self.synonym_to_identifiers_mapping) @classmethod - def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> Dict[str, Set[str]]: + def _get_norm_name_to_names(cls, synonyms: Iterable[str]) -> dict[str, set[str]]: return multisetdict((cls._normalize_text(synonym), synonym) for synonym in synonyms) @staticmethod @@ -81,7 +80,7 @@ def _iterate_synonyms_to_identifiers( id_to_name: Mapping[str, str], id_to_synonyms: Mapping[str, Iterable[str]], remove_prefix: Optional[str] = None, - ) -> Iterable[Tuple[str, str]]: + ) -> Iterable[tuple[str, str]]: if remove_prefix is not None: remove_prefix = f'{remove_prefix.lower().rstrip(":")}:' @@ -101,7 +100,7 @@ def _iterate_synonyms_to_identifiers( # it might overwrite but this is probably always due to alternate ids yield synonym, identifier - def get_names(self, query: str) -> List[str]: + def get_names(self, query: str) -> list[str]: """Get all names to which the query text maps.""" norm_text = self._normalize_text(query) return list(self.norm_name_to_name.get(norm_text, [])) @@ -112,7 +111,7 @@ def normalize(self, query: str) -> NormalizationResult: raise NotImplementedError -@lru_cache() +@lru_cache def get_normalizer(prefix: str) -> Normalizer: """Get an OBO normalizer.""" norm_prefix = bioregistry.normalize_prefix(prefix) @@ -150,6 +149,7 @@ class OboNormalizer(Normalizer): """A utility for normalizing by names.""" def __init__(self, prefix: str) -> None: # noqa: D107 + """Initialize the normalizer by an ontology's Bioregistry prefix.""" self.prefix = prefix self._len_prefix = len(prefix) id_to_name = names.get_id_name_mapping(prefix) @@ -160,7 +160,7 @@ def __init__(self, prefix: str) -> None: # noqa: D107 remove_prefix=prefix, ) - def __repr__(self) -> str: # noqa: D105 + def __repr__(self) -> str: return f'OboNormalizer(prefix="{self.prefix}")' def normalize(self, query: str) -> NormalizationResult: @@ -188,20 +188,20 @@ class MultiNormalizer: If you're looking for taxa of exotic plants, you might use: >>> from pyobo.normalizer import MultiNormalizer - >>> normalizer = MultiNormalizer(prefixes=['ncbitaxon', 'itis']) - >>> normalizer.normalize('Homo sapiens') + >>> normalizer = MultiNormalizer(prefixes=["ncbitaxon", "itis"]) + >>> normalizer.normalize("Homo sapiens") ('ncbitaxon', '9606', 'Homo sapiens') - >>> normalizer.normalize('Abies bifolia') # variety not listed in NCBI + >>> normalizer.normalize("Abies bifolia") # variety not listed in NCBI ('itis', '507501', 'Abies bifolia') - >>> normalizer.normalize('vulcan') # nice try, nerds + >>> normalizer.normalize("vulcan") # nice try, nerds (None, None, None) """ #: The normalizers for each prefix - normalizers: List[Normalizer] + normalizers: list[Normalizer] @staticmethod - def from_prefixes(prefixes: List[str]) -> "MultiNormalizer": + def from_prefixes(prefixes: list[str]) -> "MultiNormalizer": """Instantiate normalizers based on the given prefixes, in preferred order..""" return MultiNormalizer([get_normalizer(prefix) for prefix in prefixes]) diff --git a/src/pyobo/obographs.py b/src/pyobo/obographs.py index 681b5a68..c782d826 100644 --- a/src/pyobo/obographs.py +++ b/src/pyobo/obographs.py @@ -1,6 +1,6 @@ """Convert PyOBO into OBO Graph.""" -from typing import Iterable, List +from collections.abc import Iterable import bioregistry import curies @@ -35,8 +35,8 @@ def parse_results_from_obo(obo: Obo) -> ParseResults: def graph_from_obo(obo: Obo) -> Graph: """Get an OBO Graph object from a PyOBO object.""" - nodes: List[Node] = [] - edges: List[Edge] = [] + nodes: list[Node] = [] + edges: list[Edge] = [] for term in obo: nodes.append(_get_class_node(term)) edges.extend(_iter_edges(term)) diff --git a/src/pyobo/plugins.py b/src/pyobo/plugins.py index d466c70d..a05e1a52 100644 --- a/src/pyobo/plugins.py +++ b/src/pyobo/plugins.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Tools for loading entry points.""" +from collections.abc import Iterable, Mapping from functools import lru_cache -from typing import Callable, Iterable, Mapping, Optional +from typing import Callable, Optional from .struct import Obo @@ -14,7 +13,7 @@ ] -@lru_cache() +@lru_cache def _get_nomenclature_plugins() -> Mapping[str, Callable[[], Obo]]: from .sources import ontology_resolver diff --git a/src/pyobo/py.typed b/src/pyobo/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 5be5d941..b296cc5b 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -1,11 +1,10 @@ -# -*- coding: utf-8 -*- - """OBO Readers.""" import logging +from collections.abc import Iterable, Mapping from datetime import datetime from pathlib import Path -from typing import Any, Iterable, List, Mapping, Optional, Tuple, Union +from typing import Any, Optional, Union import bioregistry import networkx as nx @@ -13,7 +12,7 @@ from tqdm.auto import tqdm from .constants import DATE_FORMAT, PROVENANCE_PREFIXES -from .identifier_utils import MissingPrefix, normalize_curie +from .identifier_utils import MissingPrefixError, normalize_curie from .registries import curie_has_blacklisted_prefix, curie_is_blacklisted, remap_prefix from .struct import ( Obo, @@ -39,7 +38,7 @@ # FIXME use bioontologies # RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load() -RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = { +RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = { "part_of": part_of.pair, "has_part": has_part.pair, "develops_from": develops_from.pair, @@ -75,7 +74,7 @@ def from_obo_path( return from_obonet(graph, strict=strict, **kwargs) -def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noqa:C901 +def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": """Get all of the terms from a OBO graph.""" _ontology = graph.graph["ontology"] ontology = bioregistry.normalize_prefix(_ontology) # probably always okay @@ -126,12 +125,12 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq ) for prefix, identifier, data in _iter_obo_graph(graph=graph, strict=strict) ) - references: Mapping[Tuple[str, str], Reference] = { + references: Mapping[tuple[str, str], Reference] = { reference.pair: reference for reference in reference_it } #: CURIEs to typedefs - typedefs: Mapping[Tuple[str, str], TypeDef] = { + typedefs: Mapping[tuple[str, str], TypeDef] = { typedef.pair: typedef for typedef in iterate_graph_typedefs(graph, ontology) } @@ -152,7 +151,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq try: node_xrefs = list(iterate_node_xrefs(prefix=prefix, data=data, strict=strict)) - except MissingPrefix as e: + except MissingPrefixError as e: e.reference = reference raise e xrefs, provenance = [], [] @@ -171,7 +170,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq try: alt_ids = list(iterate_node_alt_ids(data, strict=strict)) - except MissingPrefix as e: + except MissingPrefixError as e: e.reference = reference raise e n_alt_ids += len(alt_ids) @@ -185,7 +184,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq strict=strict, ) ) - except MissingPrefix as e: + except MissingPrefixError as e: e.reference = reference raise e n_parents += len(parents) @@ -220,7 +219,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo": # noq strict=strict, ) ) - except MissingPrefix as e: + except MissingPrefixError as e: e.reference = reference raise e for relation, reference in relations_references: @@ -278,7 +277,7 @@ def _iter_obo_graph( graph: nx.MultiDiGraph, *, strict: bool = True, -) -> Iterable[Tuple[str, str, Mapping[str, Any]]]: +) -> Iterable[tuple[str, str, Mapping[str, Any]]]: """Iterate over the nodes in the graph with the prefix stripped (if it's there).""" for node, data in graph.nodes(data=True): prefix, identifier = normalize_curie(node, strict=strict) @@ -366,7 +365,8 @@ def iterate_graph_typedefs( def get_definition( data, *, prefix: str, identifier: str -) -> Union[Tuple[None, None], Tuple[str, List[Reference]]]: +) -> Union[tuple[None, None], tuple[str, list[Reference]]]: + """Extract the definition from the data.""" definition = data.get("def") # it's allowed not to have a definition if not definition: return None, None @@ -379,7 +379,7 @@ def _extract_definition( prefix: str, identifier: str, strict: bool = False, -) -> Union[Tuple[None, None], Tuple[str, List[Reference]]]: +) -> Union[tuple[None, None], tuple[str, list[Reference]]]: """Extract the definitions.""" if not s.startswith('"'): raise ValueError("definition does not start with a quote") @@ -405,7 +405,7 @@ def _get_first_nonquoted(s: str) -> Optional[int]: return None -def _quote_split(s: str) -> Tuple[str, str]: +def _quote_split(s: str) -> tuple[str, str]: s = s.lstrip('"') i = _get_first_nonquoted(s) if i is None: @@ -416,9 +416,7 @@ def _quote_split(s: str) -> Tuple[str, str]: def _clean_definition(s: str) -> str: # if '\t' in s: # logger.warning('has tab') - return ( - s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "") # noqa:W605 - ) + return s.replace('\\"', '"').replace("\n", " ").replace("\t", " ").replace(r"\d", "") def _extract_synonym( @@ -516,7 +514,7 @@ def iterate_node_synonyms( def iterate_node_properties( data: Mapping[str, Any], *, property_prefix: Optional[str] = None, term=None -) -> Iterable[Tuple[str, str]]: +) -> Iterable[tuple[str, str]]: """Extract properties from a :mod:`obonet` node's data.""" for prop_value_type in data.get("property_value", []): try: @@ -568,7 +566,7 @@ def iterate_node_relationships( prefix: str, identifier: str, strict: bool = True, -) -> Iterable[Tuple[Reference, Reference]]: +) -> Iterable[tuple[Reference, Reference]]: """Extract relationships from a :mod:`obonet` node's data.""" for s in data.get("relationship", []): relation_curie, target_curie = s.split(" ") diff --git a/src/pyobo/registries/__init__.py b/src/pyobo/registries/__init__.py index 8abead31..928e1e5d 100644 --- a/src/pyobo/registries/__init__.py +++ b/src/pyobo/registries/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Extract registry information.""" from .metaregistry import ( # noqa: F401 diff --git a/src/pyobo/registries/metaregistry.py b/src/pyobo/registries/metaregistry.py index 72662056..fd6e4e66 100644 --- a/src/pyobo/registries/metaregistry.py +++ b/src/pyobo/registries/metaregistry.py @@ -1,13 +1,11 @@ -# -*- coding: utf-8 -*- - """Load the manually curated metaregistry.""" import itertools as itt import json import os +from collections.abc import Iterable, Mapping from functools import lru_cache from pathlib import Path -from typing import Iterable, Mapping, Set, Tuple import bioregistry @@ -25,7 +23,7 @@ def has_no_download(prefix: str) -> bool: @lru_cache(maxsize=1) -def _no_download() -> Set[str]: +def _no_download() -> set[str]: """Get the list of prefixes not available as OBO.""" return { prefix @@ -41,7 +39,7 @@ def curie_has_blacklisted_prefix(curie: str) -> bool: @lru_cache(maxsize=1) -def get_xrefs_prefix_blacklist() -> Set[str]: +def get_xrefs_prefix_blacklist() -> set[str]: """Get the set of blacklisted xref prefixes.""" #: Xrefs starting with these prefixes will be ignored prefixes = set( @@ -65,7 +63,7 @@ def curie_has_blacklisted_suffix(curie: str) -> bool: @lru_cache(maxsize=1) -def get_xrefs_suffix_blacklist() -> Set[str]: +def get_xrefs_suffix_blacklist() -> set[str]: """Get the set of blacklisted xref suffixes.""" #: Xrefs ending with these suffixes will be ignored return set(CURATED_REGISTRY["blacklists"]["suffix"]) @@ -77,7 +75,7 @@ def curie_is_blacklisted(curie: str) -> bool: @lru_cache(maxsize=1) -def get_xrefs_blacklist() -> Set[str]: +def get_xrefs_blacklist() -> set[str]: """Get the set of blacklisted xrefs.""" rv = set() for x in CURATED_REGISTRY["blacklists"]["full"]: @@ -123,7 +121,7 @@ def remap_prefix(curie: str) -> str: return curie -def iter_cached_obo() -> Iterable[Tuple[str, str]]: +def iter_cached_obo() -> Iterable[tuple[str, str]]: """Iterate over cached OBO paths.""" for prefix in os.listdir(RAW_DIRECTORY): if prefix in GLOBAL_SKIP or has_no_download(prefix) or bioregistry.is_deprecated(prefix): diff --git a/src/pyobo/resource_utils.py b/src/pyobo/resource_utils.py index 1d696f18..f686a9d6 100644 --- a/src/pyobo/resource_utils.py +++ b/src/pyobo/resource_utils.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Resource utilities for PyOBO.""" +from collections.abc import Sequence from functools import lru_cache -from typing import Sequence import click import pandas as pd diff --git a/src/pyobo/resources/__init__.py b/src/pyobo/resources/__init__.py index e585b1d6..4a3091e8 100644 --- a/src/pyobo/resources/__init__.py +++ b/src/pyobo/resources/__init__.py @@ -1,3 +1 @@ -# -*- coding: utf-8 -*- - """Pre-cached resources for PyOBO.""" diff --git a/src/pyobo/resources/ncbitaxon.py b/src/pyobo/resources/ncbitaxon.py index a9308bfc..ba634965 100644 --- a/src/pyobo/resources/ncbitaxon.py +++ b/src/pyobo/resources/ncbitaxon.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- - """Loading of the NCBI Taxonomy names.""" import csv import gzip +from collections.abc import Mapping from functools import lru_cache from pathlib import Path -from typing import Mapping, Optional, Union +from typing import Optional, Union import requests diff --git a/src/pyobo/resources/ro.py b/src/pyobo/resources/ro.py index dd3fa322..f204a0a6 100644 --- a/src/pyobo/resources/ro.py +++ b/src/pyobo/resources/ro.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- - """Loading of the relations ontology names.""" import csv import os +from collections.abc import Mapping from functools import lru_cache -from typing import Mapping, Tuple import requests @@ -20,7 +18,7 @@ @lru_cache(maxsize=1) -def load_ro() -> Mapping[Tuple[str, str], str]: +def load_ro() -> Mapping[tuple[str, str], str]: """Load the relation ontology names.""" if not os.path.exists(PATH): download() diff --git a/src/pyobo/sources/__init__.py b/src/pyobo/sources/__init__.py index 0af3484e..8d902b1a 100644 --- a/src/pyobo/sources/__init__.py +++ b/src/pyobo/sources/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Sources of OBO content.""" from class_resolver import ClassResolver diff --git a/src/pyobo/sources/agrovoc.py b/src/pyobo/sources/agrovoc.py index b5d93f7f..bf37e7f3 100644 --- a/src/pyobo/sources/agrovoc.py +++ b/src/pyobo/sources/agrovoc.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Converter for AGROVOC.""" import pystow diff --git a/src/pyobo/sources/antibodyregistry.py b/src/pyobo/sources/antibodyregistry.py index 20b8b229..9ff96d4d 100644 --- a/src/pyobo/sources/antibodyregistry.py +++ b/src/pyobo/sources/antibodyregistry.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for the Antibody Registry.""" import logging -from typing import Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Optional import pandas as pd from bioregistry.utils import removeprefix diff --git a/src/pyobo/sources/biogrid.py b/src/pyobo/sources/biogrid.py index 2843fbd3..ec965c20 100644 --- a/src/pyobo/sources/biogrid.py +++ b/src/pyobo/sources/biogrid.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Extract and convert BioGRID identifiers.""" +from collections.abc import Mapping from functools import partial -from typing import Mapping, Optional +from typing import Optional import pandas as pd @@ -77,7 +76,8 @@ def get_ncbigene_mapping() -> Mapping[str, str]: .. code-block:: python from pyobo import get_filtered_xrefs - biogrid_ncbigene_mapping = get_filtered_xrefs('biogrid', 'ncbigene') + + biogrid_ncbigene_mapping = get_filtered_xrefs("biogrid", "ncbigene") """ df = get_df() df = df.loc[df["IDENTIFIER_TYPE"] == "ENTREZ_GENE", ["BIOGRID_ID", "IDENTIFIER_VALUE"]] diff --git a/src/pyobo/sources/ccle.py b/src/pyobo/sources/ccle.py index 1c0612d7..f43f3a69 100644 --- a/src/pyobo/sources/ccle.py +++ b/src/pyobo/sources/ccle.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Get the CCLE Cells, provided by cBioPortal.""" import tarfile +from collections.abc import Iterable from pathlib import Path -from typing import Iterable, Optional +from typing import Optional import pandas as pd import pystow @@ -25,7 +24,7 @@ class CCLEGetter(Obo): ontology = bioregistry_key = PREFIX - def __post_init__(self): # noqa: D105 + def __post_init__(self): self.data_version = VERSION def iter_terms(self, force: bool = False) -> Iterable[Term]: diff --git a/src/pyobo/sources/cgnc.py b/src/pyobo/sources/cgnc.py index 8a11f960..4a6ebefb 100644 --- a/src/pyobo/sources/cgnc.py +++ b/src/pyobo/sources/cgnc.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for CGNC.""" import logging -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/chebi.py b/src/pyobo/sources/chebi.py index f16dc002..0d9a4fdc 100644 --- a/src/pyobo/sources/chebi.py +++ b/src/pyobo/sources/chebi.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Converter for ChEBI.""" -from typing import Mapping, Set, Tuple +from collections.abc import Mapping from ..api import get_filtered_properties_mapping, get_filtered_relations_df from ..struct import Reference, TypeDef @@ -33,7 +31,7 @@ def get_chebi_smiles_id_mapping() -> Mapping[str, str]: has_role = TypeDef(reference=Reference(prefix="chebi", identifier="has_role")) -def get_chebi_role_to_children() -> Mapping[str, Set[Tuple[str, str]]]: +def get_chebi_role_to_children() -> Mapping[str, set[tuple[str, str]]]: """Get the ChEBI role to children mapping.""" df = get_filtered_relations_df("chebi", relation=has_role) return multisetdict((role_id, ("chebi", chemical_id)) for chemical_id, _, role_id in df.values) diff --git a/src/pyobo/sources/chembl.py b/src/pyobo/sources/chembl.py index e9191390..f7d78782 100644 --- a/src/pyobo/sources/chembl.py +++ b/src/pyobo/sources/chembl.py @@ -1,13 +1,11 @@ -# -*- coding: utf-8 -*- - """Converter for ChEMBL. Run with ``python -m pyobo.sources.chembl -vv``. """ import logging +from collections.abc import Iterable from contextlib import closing -from typing import Iterable import chembl_downloader diff --git a/src/pyobo/sources/civic_gene.py b/src/pyobo/sources/civic_gene.py index 2df55c2b..0873e6b2 100644 --- a/src/pyobo/sources/civic_gene.py +++ b/src/pyobo/sources/civic_gene.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for CiVIC Genes.""" -from typing import Iterable, Optional +from collections.abc import Iterable +from typing import Optional import pandas as pd diff --git a/src/pyobo/sources/complexportal.py b/src/pyobo/sources/complexportal.py index 8512a0eb..ce1fffa6 100644 --- a/src/pyobo/sources/complexportal.py +++ b/src/pyobo/sources/complexportal.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for ComplexPortal.""" import logging -from typing import Iterable, List, Tuple +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm @@ -52,7 +50,7 @@ } -def _parse_members(s) -> List[Tuple[Reference, str]]: +def _parse_members(s) -> list[tuple[Reference, str]]: if pd.isna(s): return [] @@ -68,7 +66,7 @@ def _parse_members(s) -> List[Tuple[Reference, str]]: return rv -def _parse_xrefs(s) -> List[Tuple[Reference, str]]: +def _parse_xrefs(s) -> list[tuple[Reference, str]]: if pd.isna(s): return [] diff --git a/src/pyobo/sources/conso.py b/src/pyobo/sources/conso.py index dd672832..be4e40f6 100644 --- a/src/pyobo/sources/conso.py +++ b/src/pyobo/sources/conso.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Converter for CONSO.""" -from typing import Iterable, List +from collections.abc import Iterable import pandas as pd @@ -68,7 +66,7 @@ def iter_terms() -> Iterable[Term]: for _, row in terms_df.iterrows(): if row["Name"] == "WITHDRAWN": continue - provenance: List[Reference] = [] + provenance: list[Reference] = [] for curie in row["References"].split(","): curie = curie.strip() if not curie: diff --git a/src/pyobo/sources/cpt.py b/src/pyobo/sources/cpt.py index 59c9eb52..3384cb89 100644 --- a/src/pyobo/sources/cpt.py +++ b/src/pyobo/sources/cpt.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Converter for CPT.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/credit.py b/src/pyobo/sources/credit.py index e19e5a24..efdad674 100644 --- a/src/pyobo/sources/credit.py +++ b/src/pyobo/sources/credit.py @@ -3,7 +3,7 @@ from __future__ import annotations import json -from typing import Iterable +from collections.abc import Iterable from more_itertools import chunked diff --git a/src/pyobo/sources/cvx.py b/src/pyobo/sources/cvx.py index d48a16e5..7e0d9e19 100644 --- a/src/pyobo/sources/cvx.py +++ b/src/pyobo/sources/cvx.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for CVX.""" from collections import defaultdict -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/depmap.py b/src/pyobo/sources/depmap.py index 2e8ad5e9..579f493d 100644 --- a/src/pyobo/sources/depmap.py +++ b/src/pyobo/sources/depmap.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- - """DepMap cell lines.""" -from typing import Iterable, Optional +from collections.abc import Iterable +from typing import Optional import pandas as pd import pystow @@ -113,7 +112,7 @@ def ensure(version: str, force: bool = False) -> pd.DataFrame: url=get_url(version=version), name="sample_info.tsv", force=force, - read_csv_kwargs=dict(sep=",", dtype=str), + read_csv_kwargs={"sep": ",", "dtype": str}, ) diff --git a/src/pyobo/sources/dictybase_gene.py b/src/pyobo/sources/dictybase_gene.py index 8d72780a..01b4e379 100644 --- a/src/pyobo/sources/dictybase_gene.py +++ b/src/pyobo/sources/dictybase_gene.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- - """Converter for dictyBase gene. Note that normal dictybase idenififers are for sequences """ import logging -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/drugbank.py b/src/pyobo/sources/drugbank.py index 5938aef5..5553ef0e 100644 --- a/src/pyobo/sources/drugbank.py +++ b/src/pyobo/sources/drugbank.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Convert DrugBank to OBO. Run with ``python -m pyobo.sources.drugbank`` @@ -8,14 +6,15 @@ import datetime import itertools as itt import logging +from collections.abc import Iterable, Mapping from functools import lru_cache -from typing import Any, Dict, Iterable, Mapping, Optional +from typing import Any, Optional from xml.etree import ElementTree import pystow from tqdm.auto import tqdm -from ..getters import NoBuild +from ..getters import NoBuildError from ..struct import Obo, Reference, Term from ..struct.typedef import has_inchi, has_salt, has_smiles from ..utils.cache import cached_pickle @@ -139,7 +138,7 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term: return term -@lru_cache() +@lru_cache def get_xml_root(version: Optional[str] = None) -> ElementTree.Element: """Get the DrugBank XML parser root. @@ -152,7 +151,7 @@ def get_xml_root(version: Optional[str] = None) -> ElementTree.Element: username = pystow.get_config("pyobo", "drugbank_username", raise_on_missing=True) password = pystow.get_config("pyobo", "drugbank_password", raise_on_missing=True) except ConfigError as e: - raise NoBuild from e + raise NoBuildError from e element = parse_drugbank(version=version, username=username, password=password) return element.getroot() @@ -167,7 +166,7 @@ def get_xml_root(version: Optional[str] = None) -> ElementTree.Element: def _extract_drug_info(drug_xml: ElementTree.Element) -> Mapping[str, Any]: """Extract information from an XML element representing a drug.""" # assert drug_xml.tag == f'{ns}drug' - row: Dict[str, Any] = { + row: dict[str, Any] = { "type": drug_xml.get("type"), "drugbank_id": drug_xml.findtext(f"{ns}drugbank-id[@primary='true']"), "cas": drug_xml.findtext(f"{ns}cas-number"), diff --git a/src/pyobo/sources/drugbank_salt.py b/src/pyobo/sources/drugbank_salt.py index fd6f8d8a..cdce63b4 100644 --- a/src/pyobo/sources/drugbank_salt.py +++ b/src/pyobo/sources/drugbank_salt.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Convert DrugBank Salts to OBO. Run with ``python -m pyobo.sources.drugbank_salt`` @@ -10,11 +8,12 @@ .. code-block:: python import pyobo - df = pyobo.get_filtered_relations_df('drugbank', 'obo:has_salt') + + df = pyobo.get_filtered_relations_df("drugbank", "obo:has_salt") """ import logging -from typing import Iterable +from collections.abc import Iterable from .drugbank import iterate_drug_info from ..struct import Obo, Reference, Term diff --git a/src/pyobo/sources/drugcentral.py b/src/pyobo/sources/drugcentral.py index 19fe0fb1..9140ff73 100644 --- a/src/pyobo/sources/drugcentral.py +++ b/src/pyobo/sources/drugcentral.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- - """Get DrugCentral as OBO.""" import logging from collections import defaultdict +from collections.abc import Iterable from contextlib import closing -from typing import DefaultDict, Iterable, List import bioregistry import psycopg2 @@ -25,9 +23,9 @@ HOST = "unmtid-dbs.net" PORT = 5433 USER = "drugman" -PASSWORD = "dosage" +PASSWORD = "dosage" # noqa:S105 DBNAME = "drugcentral" -PARAMS = dict(dbname=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) +PARAMS = {"dbname": DBNAME, "user": USER, "password": PASSWORD, "host": HOST, "port": PORT} class DrugCentralGetter(Obo): @@ -58,7 +56,7 @@ def iter_terms() -> Iterable[Term]: with closing(conn.cursor()) as cur: cur.execute("SELECT struct_id, id_type, identifier FROM public.identifier") rows = cur.fetchall() - xrefs: DefaultDict[str, List[Reference]] = defaultdict(list) + xrefs: defaultdict[str, list[Reference]] = defaultdict(list) for drugcentral_id, prefix, identifier in tqdm( rows, unit_scale=True, desc="loading xrefs" ): @@ -76,7 +74,7 @@ def iter_terms() -> Iterable[Term]: ) with closing(conn.cursor()) as cur: cur.execute("SELECT id, name FROM public.synonyms") - synonyms: DefaultDict[str, List[Synonym]] = defaultdict(list) + synonyms: defaultdict[str, list[Synonym]] = defaultdict(list) for drugcentral_id, synonym in cur.fetchall(): synonyms[str(drugcentral_id)].append(Synonym(name=synonym)) diff --git a/src/pyobo/sources/expasy.py b/src/pyobo/sources/expasy.py index acd7b028..3164f7ec 100644 --- a/src/pyobo/sources/expasy.py +++ b/src/pyobo/sources/expasy.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Convert ExPASy to OBO.""" import logging from collections import defaultdict -from typing import Any, Dict, Iterable, Mapping, Optional, Set, Tuple +from collections.abc import Iterable, Mapping +from typing import Any, Optional from .utils import get_go_mapping from ..struct import Obo, Reference, Synonym, Term @@ -76,7 +75,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: with open(tree_path) as file: tree = get_tree(file) - terms: Dict[str, Term] = {} + terms: dict[str, Term] = {} child_to_parents = defaultdict(list) for ec_code, data in tree.items(): terms[ec_code] = Term( @@ -176,7 +175,7 @@ def normalize_expasy_id(expasy_id: str) -> str: return expasy_id.replace(" ", "") -def give_edge(unnormalized_ec_code: str) -> Tuple[int, Optional[str], str]: +def give_edge(unnormalized_ec_code: str) -> tuple[int, Optional[str], str]: """Return a (parent, child) tuple for given id.""" levels = [x for x in unnormalized_ec_code.replace(" ", "").replace("-", "").split(".") if x] level = len(levels) @@ -227,7 +226,7 @@ def get_database(lines: Iterable[str]) -> Mapping: for groups in _group_by_id(lines): _, expasy_id = groups[0] - ec_data_entry: Dict[str, Any] = { + ec_data_entry: dict[str, Any] = { "concept": { "namespace": PREFIX, "identifier": expasy_id, @@ -269,11 +268,11 @@ def get_database(lines: Iterable[str]) -> Mapping: continue uniprot_id, uniprot_accession = uniprot_entry.split(",") ec_data_entry["proteins"].append( # type:ignore - dict( - namespace="uniprot", - name=uniprot_accession, - identifier=uniprot_id, - ) + { + "namespace": "uniprot", + "name": uniprot_accession, + "identifier": uniprot_id, + } ) rv[expasy_id] = ec_data_entry @@ -300,7 +299,7 @@ def _group_by_id(lines): return groups -def get_ec2go(version: str) -> Mapping[str, Set[Tuple[str, str]]]: +def get_ec2go(version: str) -> Mapping[str, set[tuple[str, str]]]: """Get the EC mapping to GO activities.""" url = "http://current.geneontology.org/ontology/external2go/ec2go" path = ensure_path(PREFIX, url=url, name="ec2go.tsv", version=version) diff --git a/src/pyobo/sources/famplex.py b/src/pyobo/sources/famplex.py index d0441815..0450f9ce 100644 --- a/src/pyobo/sources/famplex.py +++ b/src/pyobo/sources/famplex.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for FamPlex.""" import logging from collections import defaultdict -from typing import Iterable, List, Mapping, Tuple +from collections.abc import Iterable, Mapping import bioregistry from pystow.utils import get_commit @@ -62,7 +60,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: dtype=str, force=force, ) - id_to_definition: Mapping[str, Tuple[str, str]] = { + id_to_definition: Mapping[str, tuple[str, str]] = { identifier: (definition, provenance) for identifier, provenance, definition in definitions_df.values } @@ -140,7 +138,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: yield term -def _get_xref_df(version: str) -> Mapping[str, List[Reference]]: +def _get_xref_df(version: str) -> Mapping[str, list[Reference]]: base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}" xrefs_url = f"{base_url}/equivalences.csv" xrefs_df = ensure_df(PREFIX, url=xrefs_url, version=version, header=None, sep=",", dtype=str) diff --git a/src/pyobo/sources/flybase.py b/src/pyobo/sources/flybase.py index 645aafc4..62695b48 100644 --- a/src/pyobo/sources/flybase.py +++ b/src/pyobo/sources/flybase.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for FlyBase Genes.""" import logging -from typing import Iterable, Mapping, Set +from collections.abc import Iterable, Mapping import pandas as pd from tqdm.auto import tqdm @@ -68,7 +66,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]: return dict(df.values) -def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, Set[str]]: +def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]: url = ( f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/" f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz" diff --git a/src/pyobo/sources/geonames.py b/src/pyobo/sources/geonames.py index 1b77bef4..77d7ce9a 100644 --- a/src/pyobo/sources/geonames.py +++ b/src/pyobo/sources/geonames.py @@ -3,7 +3,7 @@ from __future__ import annotations import logging -from typing import Collection, Iterable, Mapping +from collections.abc import Collection, Iterable, Mapping import pandas as pd from pystow.utils import read_zipfile_csv diff --git a/src/pyobo/sources/gmt_utils.py b/src/pyobo/sources/gmt_utils.py index b203e69f..62223abb 100644 --- a/src/pyobo/sources/gmt_utils.py +++ b/src/pyobo/sources/gmt_utils.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- - """GMT utilities.""" +from collections.abc import Iterable from pathlib import Path -from typing import Iterable, Set, Tuple, Union +from typing import Union -GMTSummary = Tuple[str, str, Set[str]] -WikiPathwaysGMTSummary = Tuple[str, str, str, str, str, Set[str]] +GMTSummary = tuple[str, str, set[str]] +WikiPathwaysGMTSummary = tuple[str, str, str, str, str, set[str]] def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]: @@ -20,7 +19,7 @@ def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]: yield _process_line(line) -def _process_line(line: str) -> Tuple[str, str, Set[str]]: +def _process_line(line: str) -> tuple[str, str, set[str]]: """Return the pathway name, url, and gene sets associated. :param line: gmt file line diff --git a/src/pyobo/sources/go.py b/src/pyobo/sources/go.py index 4e195958..5befb008 100644 --- a/src/pyobo/sources/go.py +++ b/src/pyobo/sources/go.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Gene Ontology.""" from pyobo import get_descendants @@ -14,13 +12,13 @@ def is_biological_process(identifier: str) -> bool: """Return if the given GO identifier is a biological process. - >>> is_biological_process('0006915') + >>> is_biological_process("0006915") True - >>> is_biological_process('GO:0006915') + >>> is_biological_process("GO:0006915") True - >>> is_molecular_function('0006915') + >>> is_molecular_function("0006915") False - >>> is_cellular_component('0006915') + >>> is_cellular_component("0006915") False """ return _is_descendant(identifier, "0008150") diff --git a/src/pyobo/sources/gwascentral_phenotype.py b/src/pyobo/sources/gwascentral_phenotype.py index 327d178b..f7801f67 100644 --- a/src/pyobo/sources/gwascentral_phenotype.py +++ b/src/pyobo/sources/gwascentral_phenotype.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for GWAS Central Phenotypes.""" import json -from typing import Iterable +from collections.abc import Iterable from tqdm.auto import tqdm, trange diff --git a/src/pyobo/sources/gwascentral_study.py b/src/pyobo/sources/gwascentral_study.py index ad8002ed..b11aedfb 100644 --- a/src/pyobo/sources/gwascentral_study.py +++ b/src/pyobo/sources/gwascentral_study.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Converter for GWAS Central.""" import logging import tarfile -from typing import Iterable, Optional +from collections.abc import Iterable +from typing import Optional from xml.etree import ElementTree from pyobo.struct import Obo, Reference, Term, has_part diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py index d27430f2..ade2e949 100644 --- a/src/pyobo/sources/hgnc.py +++ b/src/pyobo/sources/hgnc.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Converter for HGNC.""" import itertools as itt @@ -7,8 +5,9 @@ import logging import typing from collections import Counter, defaultdict +from collections.abc import Iterable from operator import attrgetter -from typing import DefaultDict, Dict, Iterable, Optional +from typing import Optional from tabulate import tabulate from tqdm.auto import tqdm @@ -238,12 +237,12 @@ def get_obo(*, force: bool = False) -> Obo: return HGNCGetter(force=force) -def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: # noqa:C901 +def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: """Get HGNC terms.""" if version is None: version = get_version("hgnc") unhandled_entry_keys: typing.Counter[str] = Counter() - unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict) + unhandle_locus_types: defaultdict[str, dict[str, Term]] = defaultdict(dict) path = ensure_path( PREFIX, url=DEFINITIONS_URL_FMT.format(version=version), @@ -459,8 +458,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te headers=["hgnc_id", "name", "obsolete", "link", "provenance"], tablefmt="github", ) - print(f"## {k} ({len(v)})", file=file) # noqa: T201 - print(t, "\n", file=file) # noqa: T201 + print(f"## {k} ({len(v)})", file=file) + print(t, "\n", file=file) unhandle_locus_type_counter = Counter( {locus_type: len(d) for locus_type, d in unhandle_locus_types.items()} diff --git a/src/pyobo/sources/hgncgenefamily.py b/src/pyobo/sources/hgncgenefamily.py index b9e0ca8b..5b736468 100644 --- a/src/pyobo/sources/hgncgenefamily.py +++ b/src/pyobo/sources/hgncgenefamily.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for HGNC Gene Families.""" from collections import defaultdict -from typing import Iterable, List, Mapping +from collections.abc import Iterable, Mapping import pandas as pd @@ -50,7 +48,7 @@ def get_obo(force: bool = False) -> Obo: return HGNCGroupGetter(force=force) -def get_hierarchy(force: bool = False) -> Mapping[str, List[str]]: +def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]: """Get the HGNC Gene Families hierarchy as a dictionary.""" path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force) df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str}) diff --git a/src/pyobo/sources/icd10.py b/src/pyobo/sources/icd10.py index 428d1e60..69564d29 100644 --- a/src/pyobo/sources/icd10.py +++ b/src/pyobo/sources/icd10.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- - """Convert ICD-10 to OBO. Run with python -m pyobo.sources.icd10 -v """ import logging -from typing import Any, Iterable, Mapping, Set +from collections.abc import Iterable, Mapping +from typing import Any import click from more_click import verbose_option @@ -57,7 +56,7 @@ def iter_terms() -> Iterable[Term]: chapter_urls = res_json["child"] tqdm.write(f"there are {len(chapter_urls)} chapters") - visited_identifiers: Set[str] = set() + visited_identifiers: set[str] = set() for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json): yield from visiter( identifier, diff --git a/src/pyobo/sources/icd11.py b/src/pyobo/sources/icd11.py index 28eb6232..c6f28b7a 100644 --- a/src/pyobo/sources/icd11.py +++ b/src/pyobo/sources/icd11.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Convert ICD11 to OBO. Run with python -m pyobo.sources.icd11 -v @@ -8,7 +6,8 @@ import json import logging import os -from typing import Any, Iterable, Mapping, Set +from collections.abc import Iterable, Mapping +from typing import Any import click from more_click import verbose_option @@ -67,7 +66,7 @@ def iterate_icd11() -> Iterable[Term]: tqdm.write(f'There are {len(res_json["child"])} top level entities') - visited_identifiers: Set[str] = set() + visited_identifiers: set[str] = set() for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json): yield from visiter( identifier, diff --git a/src/pyobo/sources/icd_utils.py b/src/pyobo/sources/icd_utils.py index 74d52dd5..4c3a7577 100644 --- a/src/pyobo/sources/icd_utils.py +++ b/src/pyobo/sources/icd_utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Utilities or interacting with the ICD API. Want to get your own API cliend ID and client secret? @@ -11,8 +9,9 @@ import datetime import json import os +from collections.abc import Iterable, Mapping from pathlib import Path -from typing import Any, Callable, Iterable, List, Mapping, Set, Union +from typing import Any, Callable, Union import pystow import requests @@ -20,7 +19,7 @@ from pystow.config_api import ConfigError from tqdm.auto import tqdm -from ..getters import NoBuild +from ..getters import NoBuildError from ..struct import Term TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token" # noqa:S105 @@ -43,7 +42,7 @@ def _get_entity(endpoint: str, identifier: str): return res.json() -def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> List[str]: +def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]: """Ge the child identifiers.""" return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])] @@ -55,7 +54,7 @@ def get_icd_api_headers() -> Mapping[str, str]: icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True) icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True) except ConfigError as e: - raise NoBuild from e + raise NoBuildError from e grant_type = "client_credentials" body_params = {"grant_type": grant_type} @@ -73,7 +72,7 @@ def get_icd_api_headers() -> Mapping[str, str]: def visiter( identifier: str, - visited_identifiers: Set[str], + visited_identifiers: set[str], directory: Union[str, Path], *, endpoint: str, diff --git a/src/pyobo/sources/interpro.py b/src/pyobo/sources/interpro.py index 4e9037b9..17f540c3 100644 --- a/src/pyobo/sources/interpro.py +++ b/src/pyobo/sources/interpro.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for InterPro.""" from collections import defaultdict -from typing import DefaultDict, Iterable, List, Mapping, Set, Tuple +from collections.abc import Iterable, Mapping from .utils import get_go_mapping from ..struct import Obo, Reference, Term @@ -82,7 +80,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) -> yield term -def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, Set[Tuple[str, str]]]: +def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, set[tuple[str, str]]]: """Get InterPro to Gene Ontology molecular function mapping.""" url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/interpro2go" path = ensure_path(PREFIX, url=url, name="interpro2go.tsv", version=version, force=force) @@ -98,7 +96,7 @@ def get_interpro_tree(version: str, force: bool = False): def _parse_tree_helper(lines: Iterable[str]): - rv1: DefaultDict[str, List[str]] = defaultdict(list) + rv1: defaultdict[str, list[str]] = defaultdict(list) previous_depth, previous_id = 0, "" stack = [previous_id] diff --git a/src/pyobo/sources/itis.py b/src/pyobo/sources/itis.py index 362b9cb6..1d8b4b99 100644 --- a/src/pyobo/sources/itis.py +++ b/src/pyobo/sources/itis.py @@ -1,13 +1,11 @@ -# -*- coding: utf-8 -*- - """Converter for the Integrated Taxonomic Information System (ITIS).""" import os import shutil import sqlite3 import zipfile +from collections.abc import Iterable from contextlib import closing -from typing import Iterable from pyobo.struct import Obo, Reference, Term from pyobo.utils.io import multidict diff --git a/src/pyobo/sources/kegg/__init__.py b/src/pyobo/sources/kegg/__init__.py index 27c026a3..25a398f3 100644 --- a/src/pyobo/sources/kegg/__init__.py +++ b/src/pyobo/sources/kegg/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """KEGG Databases.""" from .genes import KEGGGeneGetter diff --git a/src/pyobo/sources/kegg/api.py b/src/pyobo/sources/kegg/api.py index d2d006f0..89536c7b 100644 --- a/src/pyobo/sources/kegg/api.py +++ b/src/pyobo/sources/kegg/api.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """API utilities for KEGG.""" import urllib.error +from collections.abc import Mapping from dataclasses import dataclass -from typing import Mapping, Optional +from typing import Optional from pyobo import Reference, Term, ensure_path from pyobo.struct import from_species @@ -132,7 +131,7 @@ def _ensure_conv_genome_helper( version=version, ) with path_rv.open("w") as file: - print(file=file) # noqa: T201 + print(file=file) return path_rv.as_posix() except FileNotFoundError: return None diff --git a/src/pyobo/sources/kegg/genes.py b/src/pyobo/sources/kegg/genes.py index d4591d69..4707a51c 100644 --- a/src/pyobo/sources/kegg/genes.py +++ b/src/pyobo/sources/kegg/genes.py @@ -1,12 +1,11 @@ -# -*- coding: utf-8 -*- - """Convert KEGG Genes to OBO. Run with ``python -m pyobo.sources.kegg.genes`` """ import logging -from typing import Iterable, Optional +from collections.abc import Iterable +from typing import Optional import click from more_click import verbose_option @@ -90,7 +89,7 @@ def _make_terms( ) continue if ";" in line: - *_extras, name = [part.strip() for part in extras.split(";")] + *_extras, name = (part.strip() for part in extras.split(";")) else: name = extras diff --git a/src/pyobo/sources/kegg/genome.py b/src/pyobo/sources/kegg/genome.py index a7bb1c49..af4c7805 100644 --- a/src/pyobo/sources/kegg/genome.py +++ b/src/pyobo/sources/kegg/genome.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- - """Convert KEGG Genome to OBO. Run with ``python -m pyobo.sources.kegg.genome`` """ import logging -from typing import Iterable +from collections.abc import Iterable from tqdm.auto import tqdm diff --git a/src/pyobo/sources/kegg/pathway.py b/src/pyobo/sources/kegg/pathway.py index ecc9ea2f..0bb38b6c 100644 --- a/src/pyobo/sources/kegg/pathway.py +++ b/src/pyobo/sources/kegg/pathway.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Convert KEGG Pathways to OBO. Run with ``python -m pyobo.sources.kegg.pathway`` @@ -8,8 +6,9 @@ import logging import urllib.error from collections import defaultdict +from collections.abc import Iterable, Mapping from functools import partial -from typing import Iterable, List, Mapping, Tuple, Union +from typing import Union from tqdm.auto import tqdm from tqdm.contrib.concurrent import thread_map @@ -76,7 +75,7 @@ def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]: ) -def _get_link_pathway_map(path: str) -> Mapping[str, List[str]]: +def _get_link_pathway_map(path: str) -> Mapping[str, list[str]]: rv = defaultdict(list) with open(path) as file: for line in file: @@ -110,7 +109,7 @@ def _iter_genome_terms( list_pathway_lines = [line.strip() for line in file] for line in list_pathway_lines: line = line.strip() - pathway_id, name = [part.strip() for part in line.split("\t")] + pathway_id, name = (part.strip() for part in line.split("\t")) pathway_id = pathway_id[len("path:") :] terms[pathway_id] = term = Term.from_triple( @@ -149,7 +148,7 @@ def _iter_genome_terms( def iter_kegg_pathway_paths( version: str, skip_missing: bool = True -) -> Iterable[Union[Tuple[KEGGGenome, str, str], Tuple[None, None, None]]]: +) -> Iterable[Union[tuple[KEGGGenome, str, str], tuple[None, None, None]]]: """Get paths for the KEGG Pathway files.""" genomes = list(iter_kegg_genomes(version=version, desc="KEGG Pathways")) func = partial(_process_genome, version=version, skip_missing=skip_missing) diff --git a/src/pyobo/sources/mesh.py b/src/pyobo/sources/mesh.py index 7d5b81a2..4efc74dc 100644 --- a/src/pyobo/sources/mesh.py +++ b/src/pyobo/sources/mesh.py @@ -1,17 +1,16 @@ -# -*- coding: utf-8 -*- - """Parser for the MeSH descriptors.""" import datetime import itertools as itt import logging import re -from typing import Any, Collection, Dict, Iterable, List, Mapping, Optional, Set, Tuple +from collections.abc import Collection, Iterable, Mapping +from typing import Any, Optional from xml.etree.ElementTree import Element from tqdm.auto import tqdm -from pyobo.api.utils import get_version +from pyobo.api.utils import safe_get_version from pyobo.identifier_utils import standardize_ec from pyobo.struct import Obo, Reference, Synonym, Term from pyobo.utils.cache import cached_json, cached_mapping @@ -70,7 +69,7 @@ def _inner(): def get_terms(version: str, force: bool = False) -> Iterable[Term]: """Get MeSH OBO terms.""" - mesh_id_to_term: Dict[str, Term] = {} + mesh_id_to_term: dict[str, Term] = {} descriptors = ensure_mesh_descriptors(version=version, force=force) supplemental_records = ensure_mesh_supplemental_records(version=version, force=force) @@ -80,8 +79,8 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: name = entry["name"] definition = entry.get("scope_note") - xrefs: List[Reference] = [] - synonyms: Set[str] = set() + xrefs: list[Reference] = [] + synonyms: set[str] = set() for concept in entry["concepts"]: synonyms.add(concept["name"]) for term in concept["terms"]: @@ -107,7 +106,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: def ensure_mesh_descriptors( version: str, force: bool = False, force_process: bool = False -) -> List[Mapping[str, Any]]: +) -> list[Mapping[str, Any]]: """Get the parsed MeSH dictionary, and cache it if it wasn't already.""" @cached_json(path=prefix_directory_join(PREFIX, name="desc.json", version=version), force=force) @@ -133,7 +132,7 @@ def get_supplemental_url(version: str) -> str: return f"https://nlmpubs.nlm.nih.gov/projects/mesh/{version}/xmlmesh/supp{version}.gz" -def ensure_mesh_supplemental_records(version: str, force: bool = False) -> List[Mapping[str, Any]]: +def ensure_mesh_supplemental_records(version: str, force: bool = False) -> list[Mapping[str, Any]]: """Get the parsed MeSH dictionary, and cache it if it wasn't already.""" @cached_json(path=prefix_directory_join(PREFIX, name="supp.json", version=version), force=force) @@ -147,11 +146,11 @@ def _inner(): return _inner() -def get_descriptor_records(element: Element, id_key: str, name_key) -> List[Dict[str, Any]]: +def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict[str, Any]]: """Get MeSH descriptor records.""" logger.info("extract MeSH descriptors, concepts, and terms") - rv: List[Dict[str, Any]] = [ + rv: list[dict[str, Any]] = [ get_descriptor_record(descriptor, id_key=id_key, name_key=name_key) for descriptor in tqdm(element, desc="Getting MeSH Descriptors", unit_scale=True) ] @@ -204,7 +203,7 @@ def get_descriptor_record( element: Element, id_key: str, name_key: str, -) -> Dict[str, Any]: +) -> dict[str, Any]: """Get descriptor records from the main element. :param element: An XML element @@ -228,13 +227,13 @@ def get_descriptor_record( return rv -def get_concept_records(element: Element) -> List[Mapping[str, Any]]: +def get_concept_records(element: Element) -> list[Mapping[str, Any]]: """Get concepts from a record.""" return [get_concept_record(e) for e in element.findall("ConceptList/Concept")] -def _get_xrefs(element: Element) -> List[Tuple[str, str]]: - raw_registry_numbers: List[str] = sorted( +def _get_xrefs(element: Element) -> list[tuple[str, str]]: + raw_registry_numbers: list[str] = sorted( {e.text for e in element.findall("RelatedRegistryNumberList/RegistryNumber") if e.text} ) registry_number = element.findtext("RegistryNumber") @@ -267,7 +266,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]: if scope_note is not None: scope_note = scope_note.replace("\\n", "\n").strip() - rv: Dict[str, Any] = { + rv: dict[str, Any] = { "concept_ui": element.findtext("ConceptUI"), "name": element.findtext("ConceptName/String"), "terms": get_term_records(element), @@ -286,7 +285,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]: return rv -def get_term_records(element: Element) -> List[Mapping[str, Any]]: +def get_term_records(element: Element) -> list[Mapping[str, Any]]: """Get all of the terms for a concept.""" return [get_term_record(term) for term in element.findall("TermList/Term")] @@ -307,7 +306,7 @@ def _text_or_bust(element: Element, name: str) -> str: return n -def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]: +def _get_descriptor_qualifiers(descriptor: Element) -> list[Mapping[str, str]]: return [ { "qualifier_ui": _text_or_bust(qualifier, "QualifierUI"), @@ -321,7 +320,7 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]: def get_mesh_category_curies( letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None -) -> List[str]: +) -> list[str]: """Get the MeSH LUIDs for a category, by letter (e.g., "A"). :param letter: The MeSH tree, A for anatomy, C for disease, etc. @@ -332,8 +331,7 @@ def get_mesh_category_curies( .. seealso:: https://meshb.nlm.nih.gov/treeView """ if version is None: - version = get_version("mesh") - assert version is not None + version = safe_get_version("mesh") tree_to_mesh = get_tree_to_mesh_id(version=version) rv = [] for i in range(1, 100): diff --git a/src/pyobo/sources/mgi.py b/src/pyobo/sources/mgi.py index 99767e52..ddbd4616 100644 --- a/src/pyobo/sources/mgi.py +++ b/src/pyobo/sources/mgi.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for MGI.""" import logging from collections import defaultdict -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/mirbase.py b/src/pyobo/sources/mirbase.py index 14117c0c..9dbf96d7 100644 --- a/src/pyobo/sources/mirbase.py +++ b/src/pyobo/sources/mirbase.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for miRBase.""" import gzip import logging -from typing import Iterable, List, Mapping +from collections.abc import Iterable, Mapping from tqdm.auto import tqdm @@ -48,7 +46,7 @@ def get_obo(force: bool = False) -> Obo: return MiRBaseGetter(force=force) -def get_terms(version: str, force: bool = False) -> List[Term]: +def get_terms(version: str, force: bool = False) -> list[Term]: """Parse miRNA data from filepath and convert it to dictionary.""" _assert_frozen_version(version) url = f"{BASE_URL}/miRNA.dat.gz" @@ -77,7 +75,7 @@ def _prepare_organisms(version: str, force: bool = False): return {division: (taxonomy_id, name) for _, division, name, _tree, taxonomy_id in df.values} -def _prepare_aliases(version: str, force: bool = False) -> Mapping[str, List[str]]: +def _prepare_aliases(version: str, force: bool = False) -> Mapping[str, list[str]]: _assert_frozen_version(version) url = f"{BASE_URL}/aliases.txt.gz" df = ensure_df(PREFIX, url=url, sep="\t", version=version, force=force) @@ -94,7 +92,7 @@ def _process_definitions_lines( organisms = _prepare_organisms(version, force=force) aliases = _prepare_aliases(version, force=force) - groups: List[List[str]] = [] + groups: list[list[str]] = [] for line in lines: # TODO replace with itertools.groupby if line.startswith("ID"): diff --git a/src/pyobo/sources/mirbase_constants.py b/src/pyobo/sources/mirbase_constants.py index b8ca8796..d4dd8e91 100644 --- a/src/pyobo/sources/mirbase_constants.py +++ b/src/pyobo/sources/mirbase_constants.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Constants for miRBase.""" import pandas as pd diff --git a/src/pyobo/sources/mirbase_family.py b/src/pyobo/sources/mirbase_family.py index 5669159d..148889ed 100644 --- a/src/pyobo/sources/mirbase_family.py +++ b/src/pyobo/sources/mirbase_family.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Converter for miRBase Families.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/mirbase_mature.py b/src/pyobo/sources/mirbase_mature.py index c815a19d..3c0c90e4 100644 --- a/src/pyobo/sources/mirbase_mature.py +++ b/src/pyobo/sources/mirbase_mature.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Converter for miRBase Mature.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/msigdb.py b/src/pyobo/sources/msigdb.py index 9a8aec89..d6500b1e 100644 --- a/src/pyobo/sources/msigdb.py +++ b/src/pyobo/sources/msigdb.py @@ -1,11 +1,10 @@ -# -*- coding: utf-8 -*- - """Parsers for MSig.""" import logging -from typing import Iterable, Optional -from xml.etree import ElementTree +from collections.abc import Iterable +from typing import Optional +from lxml.etree import ElementTree from tqdm.auto import tqdm from ..struct import Obo, Reference, Term, has_participant @@ -137,7 +136,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]: def _get_definition(attrib) -> Optional[str]: rv = attrib["DESCRIPTION_FULL"].strip() or attrib["DESCRIPTION_BRIEF"].strip() or None if rv is not None: - return rv.replace(r"\d", "").replace(r"\s", "") # noqa: W605 + return rv.replace(r"\d", "").replace(r"\s", "") return None diff --git a/src/pyobo/sources/ncbigene.py b/src/pyobo/sources/ncbigene.py index c38db9db..5c97b93d 100644 --- a/src/pyobo/sources/ncbigene.py +++ b/src/pyobo/sources/ncbigene.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for Entrez.""" import logging -from typing import Iterable, List, Mapping, Set +from collections.abc import Iterable, Mapping import bioregistry import pandas as pd @@ -47,7 +45,7 @@ ] -def get_ncbigene_ids() -> Set[str]: +def get_ncbigene_ids() -> set[str]: """Get the Entrez name mapping.""" df = _get_ncbigene_subset(["GeneID"]) return set(df["GeneID"]) @@ -68,7 +66,7 @@ def _get_ncbigene_info_subset(usecols) -> Mapping[str, str]: return dict(df.values) -def _get_ncbigene_subset(usecols: List[str]) -> pd.DataFrame: +def _get_ncbigene_subset(usecols: list[str]) -> pd.DataFrame: df = ensure_df( PREFIX, url=GENE_INFO_URL, diff --git a/src/pyobo/sources/npass.py b/src/pyobo/sources/npass.py index 0a629019..5ca79458 100644 --- a/src/pyobo/sources/npass.py +++ b/src/pyobo/sources/npass.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for NPASS.""" import logging -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/omim_ps.py b/src/pyobo/sources/omim_ps.py index b809a235..f729ef51 100644 --- a/src/pyobo/sources/omim_ps.py +++ b/src/pyobo/sources/omim_ps.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Converter for OMIM Phenotypic Series.""" import logging -from typing import Iterable +from collections.abc import Iterable from bioversions.utils import get_soup diff --git a/src/pyobo/sources/pathbank.py b/src/pyobo/sources/pathbank.py index 869938a1..d33ee575 100644 --- a/src/pyobo/sources/pathbank.py +++ b/src/pyobo/sources/pathbank.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for PathBank.""" import logging from collections import defaultdict -from typing import Iterable, Mapping, Set +from collections.abc import Iterable, Mapping import pandas as pd from tqdm.auto import tqdm @@ -98,7 +96,7 @@ def get_proteins_df(version: str, force: bool = False) -> pd.DataFrame: return proteins_df -def get_protein_mapping(version: str, force: bool = False) -> Mapping[str, Set[Reference]]: +def get_protein_mapping(version: str, force: bool = False) -> Mapping[str, set[Reference]]: """Make the protein mapping.""" proteins_df = get_proteins_df(version=version, force=force) smpdb_id_to_proteins = defaultdict(set) @@ -122,7 +120,7 @@ def get_metabolite_df(version: str, force: bool = False) -> pd.DataFrame: ) -def get_metabolite_mapping(version: str, force: bool = False) -> Mapping[str, Set[Reference]]: +def get_metabolite_mapping(version: str, force: bool = False) -> Mapping[str, set[Reference]]: """Make the metabolite mapping.""" metabolites_df = get_metabolite_df(version=version, force=force) smpdb_id_to_metabolites = defaultdict(set) diff --git a/src/pyobo/sources/pfam.py b/src/pyobo/sources/pfam.py index 4df02096..cd47d6bd 100644 --- a/src/pyobo/sources/pfam.py +++ b/src/pyobo/sources/pfam.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Convert PFAM to OBO.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/pfam_clan.py b/src/pyobo/sources/pfam_clan.py index 096a835e..0b818ce5 100644 --- a/src/pyobo/sources/pfam_clan.py +++ b/src/pyobo/sources/pfam_clan.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Convert PFAM Clans to OBO.""" -from typing import Iterable +from collections.abc import Iterable from tqdm.auto import tqdm diff --git a/src/pyobo/sources/pid.py b/src/pyobo/sources/pid.py index 92494a65..db8613c6 100644 --- a/src/pyobo/sources/pid.py +++ b/src/pyobo/sources/pid.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for NCI PID.""" import logging from collections import defaultdict -from typing import Iterable, List, Mapping, Tuple +from collections.abc import Iterable, Mapping import pandas as pd @@ -45,7 +43,7 @@ def get_obo() -> Obo: return PIDGetter() -def iter_networks(use_tqdm: bool = False, force: bool = False) -> Iterable[Tuple[str, CX]]: +def iter_networks(use_tqdm: bool = False, force: bool = False) -> Iterable[tuple[str, CX]]: """Iterate over NCI PID networks.""" yield from ensure_ndex_network_set( PREFIX, NDEX_NETWORK_SET_UUID, use_tqdm=use_tqdm, force=force @@ -117,7 +115,7 @@ def get_curation_df() -> pd.DataFrame: return df[["Text from NDEx", "Type", "Namespace", "Identifier"]] -def get_remapping() -> Mapping[str, List[Tuple[str, str]]]: +def get_remapping() -> Mapping[str, list[tuple[str, str]]]: """Get a mapping from text to list of HGNC id/symbols.""" curation_df = get_curation_df() rv = defaultdict(list) diff --git a/src/pyobo/sources/pombase.py b/src/pyobo/sources/pombase.py index 290fa44a..ce28d119 100644 --- a/src/pyobo/sources/pombase.py +++ b/src/pyobo/sources/pombase.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for PomBase.""" import logging from collections import defaultdict -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/pubchem.py b/src/pyobo/sources/pubchem.py index 6c91ca08..df6db5d7 100644 --- a/src/pyobo/sources/pubchem.py +++ b/src/pyobo/sources/pubchem.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for PubChem Compound.""" import logging -from typing import Iterable, Mapping, Optional +from collections.abc import Iterable, Mapping +from typing import Optional import pandas as pd from bioregistry.utils import removeprefix diff --git a/src/pyobo/sources/reactome.py b/src/pyobo/sources/reactome.py index 9757f7bb..1d09904d 100644 --- a/src/pyobo/sources/reactome.py +++ b/src/pyobo/sources/reactome.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- - """Converter for Reactome.""" import logging from collections import defaultdict +from collections.abc import Iterable, Mapping from functools import lru_cache -from typing import Iterable, Mapping, Set import pandas as pd from tqdm.auto import tqdm @@ -122,7 +120,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]: @lru_cache(maxsize=1) -def get_protein_to_pathways() -> Mapping[str, Set[str]]: +def get_protein_to_pathways() -> Mapping[str, set[str]]: """Get a mapping from proteins to the pathways they're in.""" protein_to_pathways = defaultdict(set) x = get_id_multirelations_mapping("reactome", has_participant) diff --git a/src/pyobo/sources/rgd.py b/src/pyobo/sources/rgd.py index d1871257..ca68215b 100644 --- a/src/pyobo/sources/rgd.py +++ b/src/pyobo/sources/rgd.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for RGD.""" import logging -from typing import Iterable, Optional +from collections.abc import Iterable +from typing import Optional import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/rhea.py b/src/pyobo/sources/rhea.py index 412ef8c2..145cf0a9 100644 --- a/src/pyobo/sources/rhea.py +++ b/src/pyobo/sources/rhea.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for Rhea.""" import logging -from typing import TYPE_CHECKING, Dict, Iterable, Optional +from collections.abc import Iterable +from typing import TYPE_CHECKING, Optional import pystow @@ -71,7 +70,7 @@ def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdfl version, url=RHEA_RDF_GZ_URL, force=force, - parse_kwargs=dict(format="xml"), + parse_kwargs={"format": "xml"}, ) @@ -103,10 +102,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]: ) names = {str(identifier): str(name) for _, identifier, name in result} - terms: Dict[str, Term] = {} - master_to_left: Dict[str, str] = {} - master_to_right: Dict[str, str] = {} - master_to_bi: Dict[str, str] = {} + terms: dict[str, Term] = {} + master_to_left: dict[str, str] = {} + master_to_right: dict[str, str] = {} + master_to_bi: dict[str, str] = {} directions = ensure_df( PREFIX, diff --git a/src/pyobo/sources/ror.py b/src/pyobo/sources/ror.py index 7c8f3a0b..65539486 100644 --- a/src/pyobo/sources/ror.py +++ b/src/pyobo/sources/ror.py @@ -4,7 +4,8 @@ import json import zipfile -from typing import Any, Iterable +from collections.abc import Iterable +from typing import Any import bioregistry import zenodo_client @@ -62,7 +63,7 @@ class RORGetter(Obo): "rdfs": "http://www.w3.org/2000/01/rdf-schema#", } - def __post_init__(self): # noqa: D105 + def __post_init__(self): self.data_version, _url, _path = _get_info() super().__post_init__() diff --git a/src/pyobo/sources/selventa/__init__.py b/src/pyobo/sources/selventa/__init__.py index 571b3831..7c402032 100644 --- a/src/pyobo/sources/selventa/__init__.py +++ b/src/pyobo/sources/selventa/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Importers for selventa terminologies.""" from .schem import SCHEMGetter diff --git a/src/pyobo/sources/selventa/schem.py b/src/pyobo/sources/selventa/schem.py index b4ce2124..af4945b2 100644 --- a/src/pyobo/sources/selventa/schem.py +++ b/src/pyobo/sources/selventa/schem.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- - """Selventa chemicals. .. seealso:: https://github.com/pyobo/pyobo/issues/27 """ -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/selventa/scomp.py b/src/pyobo/sources/selventa/scomp.py index 408a2b51..fdccd8e1 100644 --- a/src/pyobo/sources/selventa/scomp.py +++ b/src/pyobo/sources/selventa/scomp.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Selventa complexes.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/selventa/sdis.py b/src/pyobo/sources/selventa/sdis.py index bc6c2800..39673f0d 100644 --- a/src/pyobo/sources/selventa/sdis.py +++ b/src/pyobo/sources/selventa/sdis.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- - """Selventa diseases. .. seealso:: https://github.com/pyobo/pyobo/issues/26 """ -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/selventa/sfam.py b/src/pyobo/sources/selventa/sfam.py index cb909eac..52185ac8 100644 --- a/src/pyobo/sources/selventa/sfam.py +++ b/src/pyobo/sources/selventa/sfam.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Selventa families.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/sources/sgd.py b/src/pyobo/sources/sgd.py index 6a45cecc..a9ccb620 100644 --- a/src/pyobo/sources/sgd.py +++ b/src/pyobo/sources/sgd.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Converter for SGD.""" -from typing import Iterable +from collections.abc import Iterable from urllib.parse import unquote_plus from ..struct import Obo, Reference, Synonym, Term, from_species diff --git a/src/pyobo/sources/slm.py b/src/pyobo/sources/slm.py index e14a01f8..95369bb6 100644 --- a/src/pyobo/sources/slm.py +++ b/src/pyobo/sources/slm.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Swisslipids.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd from tqdm.auto import tqdm diff --git a/src/pyobo/sources/umls/__init__.py b/src/pyobo/sources/umls/__init__.py index e952d0a4..cebc592e 100644 --- a/src/pyobo/sources/umls/__init__.py +++ b/src/pyobo/sources/umls/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Converter for UMLS.""" from .umls import UMLSGetter, get_obo # noqa: F401 diff --git a/src/pyobo/sources/umls/__main__.py b/src/pyobo/sources/umls/__main__.py index ba6056d9..e90bd9ac 100644 --- a/src/pyobo/sources/umls/__main__.py +++ b/src/pyobo/sources/umls/__main__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """CLI for UMLS exporter.""" from .umls import UMLSGetter diff --git a/src/pyobo/sources/umls/get_synonym_types.py b/src/pyobo/sources/umls/get_synonym_types.py index a65e57a9..d1da0e85 100644 --- a/src/pyobo/sources/umls/get_synonym_types.py +++ b/src/pyobo/sources/umls/get_synonym_types.py @@ -1,7 +1,7 @@ """Utilities for UMLS synonyms.""" +from collections.abc import Mapping from pathlib import Path -from typing import Mapping import requests from bs4 import BeautifulSoup diff --git a/src/pyobo/sources/umls/umls.py b/src/pyobo/sources/umls/umls.py index bedc6cc2..0a3563b7 100644 --- a/src/pyobo/sources/umls/umls.py +++ b/src/pyobo/sources/umls/umls.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Converter for UMLS. Run with ``python -m pyobo.sources.umls`` @@ -8,7 +6,7 @@ import itertools as itt import operator from collections import defaultdict -from typing import Iterable, Mapping, Set +from collections.abc import Iterable, Mapping import bioregistry import pandas as pd @@ -67,7 +65,7 @@ def get_obo() -> Obo: return UMLSGetter() -def get_semantic_types() -> Mapping[str, Set[str]]: +def get_semantic_types() -> Mapping[str, set[str]]: """Get UMLS semantic types for each term.""" dd = defaultdict(set) with open_umls_semantic_types() as file: diff --git a/src/pyobo/sources/uniprot/__init__.py b/src/pyobo/sources/uniprot/__init__.py index a8721597..53761a18 100644 --- a/src/pyobo/sources/uniprot/__init__.py +++ b/src/pyobo/sources/uniprot/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Converters for UniProt resources.""" from .uniprot import PREFIX, UniProtGetter diff --git a/src/pyobo/sources/uniprot/uniprot.py b/src/pyobo/sources/uniprot/uniprot.py index e1a5a808..135f6a4c 100644 --- a/src/pyobo/sources/uniprot/uniprot.py +++ b/src/pyobo/sources/uniprot/uniprot.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Converter for UniProt.""" +from collections.abc import Iterable from operator import attrgetter from pathlib import Path -from typing import Iterable, List, Optional, cast +from typing import Optional, cast from tqdm.auto import tqdm @@ -155,7 +154,7 @@ def iter_terms(version: Optional[str] = None) -> Iterable[Term]: yield term -def _parse_go(go_terms) -> List[Reference]: +def _parse_go(go_terms) -> list[Reference]: rv = [] if go_terms: for go_term in go_terms.split(";"): diff --git a/src/pyobo/sources/uniprot/uniprot_ptm.py b/src/pyobo/sources/uniprot/uniprot_ptm.py index 6dd44009..438625a3 100644 --- a/src/pyobo/sources/uniprot/uniprot_ptm.py +++ b/src/pyobo/sources/uniprot/uniprot_ptm.py @@ -27,7 +27,8 @@ import itertools as itt from collections import defaultdict -from typing import DefaultDict, Iterable, List, Mapping, Optional, Tuple +from collections.abc import Iterable, Mapping +from typing import Optional from tqdm.auto import tqdm @@ -63,18 +64,18 @@ def iter_terms(force: bool = False) -> Iterable[Term]: path = ensure_path(PREFIX, url=URL, force=force) with open(path) as file: lines = list(file) - it: Iterable[Tuple[str, str]] = ((line[:2], line[2:].strip()) for line in lines[47:-5]) + it: Iterable[tuple[str, str]] = ((line[:2], line[2:].strip()) for line in lines[47:-5]) for i, (_, term_lines) in enumerate(itt.groupby(it, key=lambda p: p[0] == "//")): term = _parse(i, term_lines) if term: yield term -def _parse(i, lines: Iterable[Tuple[str, str]]) -> Optional[Term]: - dd_: DefaultDict[str, List[str]] = defaultdict(list) +def _parse(i, lines: Iterable[tuple[str, str]]) -> Optional[Term]: + dd_: defaultdict[str, list[str]] = defaultdict(list) for key, value in lines: dd_[key].append(value) - dd: Mapping[str, List[str]] = dict(dd_) + dd: Mapping[str, list[str]] = dict(dd_) if "//" in dd: return None diff --git a/src/pyobo/sources/utils.py b/src/pyobo/sources/utils.py index 7d4838ee..60a80d20 100644 --- a/src/pyobo/sources/utils.py +++ b/src/pyobo/sources/utils.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """Utilities for converters.""" import logging -from typing import Mapping, Set, Tuple +from collections.abc import Mapping from ..utils.io import multisetdict @@ -15,7 +13,7 @@ logger = logging.getLogger(__name__) -def get_go_mapping(path: str, prefix: str) -> Mapping[str, Set[Tuple[str, str]]]: +def get_go_mapping(path: str, prefix: str) -> Mapping[str, set[tuple[str, str]]]: """Get a GO mapping file.""" with open(path) as file: return multisetdict( @@ -23,7 +21,7 @@ def get_go_mapping(path: str, prefix: str) -> Mapping[str, Set[Tuple[str, str]]] ) -def process_go_mapping_line(line: str, prefix: str) -> Tuple[str, Tuple[str, str]]: +def process_go_mapping_line(line: str, prefix: str) -> tuple[str, tuple[str, str]]: """Process a GO mapping line.""" line1 = line[len(f"{prefix}:") :] line2, go_id = line1.rsplit(";", 1) diff --git a/src/pyobo/sources/wikipathways.py b/src/pyobo/sources/wikipathways.py index 0b1ff898..99f0ecfd 100644 --- a/src/pyobo/sources/wikipathways.py +++ b/src/pyobo/sources/wikipathways.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Converter for WikiPathways.""" import logging import urllib.error -from typing import Iterable +from collections.abc import Iterable from .gmt_utils import parse_wikipathways_gmt from ..constants import SPECIES_REMAPPING diff --git a/src/pyobo/sources/zfin.py b/src/pyobo/sources/zfin.py index 33d7692f..91682ff1 100644 --- a/src/pyobo/sources/zfin.py +++ b/src/pyobo/sources/zfin.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Converter for ZFIN.""" import logging from collections import defaultdict -from typing import Iterable, Optional +from collections.abc import Iterable +from typing import Optional from tqdm.auto import tqdm diff --git a/src/pyobo/ssg/__init__.py b/src/pyobo/ssg/__init__.py index 36895455..12132249 100644 --- a/src/pyobo/ssg/__init__.py +++ b/src/pyobo/ssg/__init__.py @@ -2,9 +2,10 @@ import itertools as itt from collections import defaultdict +from collections.abc import Sequence from operator import attrgetter from pathlib import Path -from typing import Optional, Sequence, Tuple, Union +from typing import Optional, Union import bioregistry from bioregistry.constants import BIOREGISTRY_DEFAULT_BASE_URL @@ -37,7 +38,7 @@ def make_site( metaregistry_metaprefix: Optional[str] = None, metaregistry_name: Optional[str] = None, metaregistry_base_url: Optional[str] = None, - show_properties_in_manifest: Optional[Sequence[Tuple[str, str]]] = None, + show_properties_in_manifest: Optional[Sequence[tuple[str, str]]] = None, ) -> None: """Make a website in the given directory. diff --git a/src/pyobo/struct/__init__.py b/src/pyobo/struct/__init__.py index da325438..1660e8ac 100644 --- a/src/pyobo/struct/__init__.py +++ b/src/pyobo/struct/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Data structures for OBO.""" from .reference import Reference # noqa: F401 diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index ab1b3cbc..e0229838 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Data structures for OBO.""" -from typing import Optional, Tuple +from typing import Optional import bioregistry import curies @@ -124,7 +122,7 @@ def _materialize( def _escaped_identifier(self): return obo_escape(self.identifier) - def __str__(self): # noqa: D105 + def __str__(self): identifier_lower = self.identifier.lower() if identifier_lower.startswith(f"{self.prefix.lower()}:"): rv = identifier_lower @@ -134,7 +132,7 @@ def __str__(self): # noqa: D105 rv = f"{rv} ! {self.name}" return rv - def __hash__(self): # noqa: D105 + def __hash__(self): return hash((self.__class__, self.prefix, self.identifier)) @@ -145,32 +143,32 @@ class Referenced: @property def prefix(self): - """The prefix of the typedef.""" # noqa: D401 + """The prefix of the typedef.""" return self.reference.prefix @property def name(self): - """The name of the typedef.""" # noqa: D401 + """The name of the typedef.""" return self.reference.name @property def identifier(self) -> str: - """The local unique identifier for this typedef.""" # noqa: D401 + """The local unique identifier for this typedef.""" return self.reference.identifier @property def curie(self) -> str: - """The CURIE for this typedef.""" # noqa: D401 + """The CURIE for this typedef.""" return self.reference.curie @property def preferred_curie(self) -> str: - """The preferred CURIE for this typedef.""" # noqa: D401 + """The preferred CURIE for this typedef.""" return self.reference.preferred_curie @property - def pair(self) -> Tuple[str, str]: - """The pair of namespace/identifier.""" # noqa: D401 + def pair(self) -> tuple[str, str]: + """The pair of namespace/identifier.""" return self.reference.pair @property diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 72b401e1..ea16fabb 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Data structures for OBO.""" import gzip @@ -7,6 +5,7 @@ import logging import os from collections import defaultdict +from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence from dataclasses import dataclass, field from datetime import datetime from operator import attrgetter @@ -16,17 +15,8 @@ Any, Callable, ClassVar, - Collection, - Dict, - Iterable, - Iterator, - List, - Mapping, Optional, - Sequence, - Set, TextIO, - Tuple, Union, ) @@ -104,7 +94,7 @@ class Synonym: ) #: References to articles where the synonym appears - provenance: List[Reference] = field(default_factory=list) + provenance: list[Reference] = field(default_factory=list) def to_obo(self) -> str: """Write this synonym as an OBO line to appear in a [Term] stanza.""" @@ -168,7 +158,7 @@ def from_text( acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym")) -ReferenceHint = Union[Reference, "Term", Tuple[str, str], str] +ReferenceHint = Union[Reference, "Term", tuple[str, str], str] def _ensure_ref(reference: ReferenceHint) -> Reference: @@ -199,26 +189,26 @@ class Term(Referenced): definition: Optional[str] = None #: References to articles in which the term appears - provenance: List[Reference] = field(default_factory=list) + provenance: list[Reference] = field(default_factory=list) #: Relationships defined by [Typedef] stanzas - relationships: Dict[TypeDef, List[Reference]] = field(default_factory=lambda: defaultdict(list)) + relationships: dict[TypeDef, list[Reference]] = field(default_factory=lambda: defaultdict(list)) #: Properties, which are not defined with Typedef and have scalar values instead of references. - properties: Dict[str, List[str]] = field(default_factory=lambda: defaultdict(list)) + properties: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list)) #: Relationships with the default "is_a" - parents: List[Reference] = field(default_factory=list) + parents: list[Reference] = field(default_factory=list) #: Synonyms of this term - synonyms: List[Synonym] = field(default_factory=list) + synonyms: list[Synonym] = field(default_factory=list) #: Equivalent references - xrefs: List[Reference] = field(default_factory=list) - xref_types: List[Reference] = field(default_factory=list) + xrefs: list[Reference] = field(default_factory=list) + xref_types: list[Reference] = field(default_factory=list) #: Alternate Identifiers - alt_ids: List[Reference] = field(default_factory=list) + alt_ids: list[Reference] = field(default_factory=list) #: The sub-namespace within the ontology namespace: Optional[str] = None @@ -228,7 +218,7 @@ class Term(Referenced): type: Literal["Term", "Instance"] = "Term" - def __hash__(self): # noqa: D105 + def __hash__(self): return hash((self.__class__, self.prefix, self.identifier)) @classmethod @@ -321,7 +311,7 @@ def extend_parents(self, references: Collection[Reference]) -> None: raise ValueError("can not append a collection of parents containing a null parent") self.parents.extend(references) - def get_properties(self, prop) -> List[str]: + def get_properties(self, prop) -> list[str]: """Get properties from the given key.""" return self.properties[prop] @@ -343,7 +333,7 @@ def get_relationship(self, typedef: TypeDef) -> Optional[Reference]: raise ValueError return r[0] - def get_relationships(self, typedef: TypeDef) -> List[Reference]: + def get_relationships(self, typedef: TypeDef) -> list[Reference]: """Get relationships from the given type.""" return self.relationships[typedef] @@ -399,16 +389,17 @@ def append_property( self.properties[prop].append(value) def _definition_fp(self) -> str: - assert self.definition is not None + if self.definition is None: + raise AssertionError return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]' - def iterate_relations(self) -> Iterable[Tuple[TypeDef, Reference]]: + def iterate_relations(self) -> Iterable[tuple[TypeDef, Reference]]: """Iterate over pairs of typedefs and targets.""" for typedef, targets in sorted(self.relationships.items(), key=_sort_relations): for target in sorted(targets, key=lambda ref: ref.preferred_curie): yield typedef, target - def iterate_properties(self) -> Iterable[Tuple[str, str]]: + def iterate_properties(self) -> Iterable[tuple[str, str]]: """Iterate over pairs of property and values.""" for prop, values in sorted(self.properties.items()): for value in sorted(values): @@ -470,7 +461,7 @@ def _escape(s) -> str: #: A set of warnings, used to make sure we don't show the same one over and over -_TYPEDEF_WARNINGS: Set[Tuple[str, str]] = set() +_TYPEDEF_WARNINGS: set[tuple[str, str]] = set() def _sort_relations(r): @@ -489,6 +480,8 @@ def _sort_properties(r): class BioregistryError(ValueError): + """An error raised for non-canonical prefixes.""" + def __str__(self) -> str: return dedent( f""" @@ -518,10 +511,10 @@ class Obo: format_version: ClassVar[str] = "1.2" #: Type definitions - typedefs: ClassVar[Optional[List[TypeDef]]] = None + typedefs: ClassVar[Optional[list[TypeDef]]] = None #: Synonym type definitions - synonym_typedefs: ClassVar[Optional[List[SynonymTypeDef]]] = None + synonym_typedefs: ClassVar[Optional[list[SynonymTypeDef]]] = None #: An annotation about how an ontology was generated auto_generated_by: ClassVar[Optional[str]] = None @@ -541,7 +534,7 @@ class Obo: bioversions_key: ClassVar[Optional[str]] = None #: Root terms to use for the ontology - root_terms: ClassVar[Optional[List[Reference]]] = None + root_terms: ClassVar[Optional[list[Reference]]] = None #: The date the ontology was generated date: Optional[datetime] = field(default_factory=datetime.today) @@ -555,7 +548,7 @@ class Obo: #: The hierarchy of terms _hierarchy: Optional[nx.DiGraph] = field(init=False, default=None, repr=False) #: A cache of terms - _items: Optional[List[Term]] = field(init=False, default=None, repr=False) + _items: Optional[list[Term]] = field(init=False, default=None, repr=False) term_sort_key: ClassVar[Optional[Callable[["Obo", Term], int]]] = None @@ -590,7 +583,7 @@ def _get_version(self) -> Optional[str]: return get_version(self.bioversions_key) except KeyError: logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ") - except IOError: + except OSError: logger.warning(f"[{self.bioversions_key}] error while looking up version") return None @@ -717,7 +710,7 @@ def write_obo( @staticmethod def _write_lines(it, file: Optional[TextIO]): for line in it: - print(line, file=file) # noqa: T201 + print(line, file=file) def write_obonet_gz(self, path: Union[str, Path]) -> None: """Write the OBO to a gzipped dump in Obonet JSON.""" @@ -896,16 +889,16 @@ def _items_accessor(self): self._items = sorted(self.iter_terms(force=self.force), key=key) return self._items - def __iter__(self) -> Iterator["Term"]: # noqa: D105 + def __iter__(self) -> Iterator["Term"]: if self.iter_only: return iter(self.iter_terms(force=self.force)) return iter(self._items_accessor) - def ancestors(self, identifier: str) -> Set[str]: + def ancestors(self, identifier: str) -> set[str]: """Return a set of identifiers for parents of the given identifier.""" return nx.descendants(self.hierarchy, identifier) # note this is backwards - def descendants(self, identifier: str) -> Set[str]: + def descendants(self, identifier: str) -> set[str]: """Return a set of identifiers for the children of the given identifier.""" return nx.ancestors(self.hierarchy, identifier) # note this is backwards @@ -915,11 +908,12 @@ def is_descendant(self, descendant: str, ancestor: str) -> bool: .. code-block:: python from pyobo import get_obo - obo = get_obo('go') - interleukin_10_complex = '1905571' # interleukin-10 receptor complex - all_complexes = '0032991' - assert obo.is_descendant('1905571', '0032991') + obo = get_obo("go") + + interleukin_10_complex = "1905571" # interleukin-10 receptor complex + all_complexes = "0032991" + assert obo.is_descendant("1905571", "0032991") """ return ancestor in self.ancestors(descendant) @@ -932,11 +926,12 @@ def hierarchy(self) -> nx.DiGraph: .. code-block:: python from pyobo import get_obo - obo = get_obo('go') - identifier = '1905571' # interleukin-10 receptor complex - is_complex = '0032991' in nx.descendants(obo.hierarchy, identifier) # should be true - """ # noqa:D401 + obo = get_obo("go") + + identifier = "1905571" # interleukin-10 receptor complex + is_complex = "0032991" in nx.descendants(obo.hierarchy, identifier) # should be true + """ if self._hierarchy is None: self._hierarchy = nx.DiGraph() for term in self._iter_terms(desc=f"[{self.ontology}] getting hierarchy"): @@ -1007,10 +1002,10 @@ def to_obonet(self: "Obo", *, use_tqdm: bool = False) -> nx.MultiDiGraph: def get_metadata(self) -> Mapping[str, Any]: """Get metadata.""" - return dict( - version=self.data_version, - date=self.date and self.date.isoformat(), - ) + return { + "version": self.data_version, + "date": self.date and self.date.isoformat(), + } def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]: """Iterate over identifiers.""" @@ -1018,11 +1013,11 @@ def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]: if term.prefix == self.ontology: yield term.identifier - def get_ids(self, *, use_tqdm: bool = False) -> Set[str]: + def get_ids(self, *, use_tqdm: bool = False) -> set[str]: """Get the set of identifiers.""" return set(self.iterate_ids(use_tqdm=use_tqdm)) - def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[Tuple[str, str]]: + def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]: """Iterate identifier name pairs.""" for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"): if term.prefix == self.ontology and term.name: @@ -1032,19 +1027,23 @@ def get_id_name_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]: """Get a mapping from identifiers to names.""" return dict(self.iterate_id_name(use_tqdm=use_tqdm)) - def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[Tuple[str, str]]: + def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]: """Iterate over pairs of terms' identifiers and their respective definitions.""" for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"): if term.identifier and term.definition: - yield term.identifier, term.definition.strip('"').replace("\n", " ").replace( - "\t", " " - ).replace(" ", " ") + yield ( + term.identifier, + term.definition.strip('"') + .replace("\n", " ") + .replace("\t", " ") + .replace(" ", " "), + ) def get_id_definition_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]: """Get a mapping from identifiers to definitions.""" return dict(self.iterate_id_definition(use_tqdm=use_tqdm)) - def get_obsolete(self, *, use_tqdm: bool = False) -> Set[str]: + def get_obsolete(self, *, use_tqdm: bool = False) -> set[str]: """Get the set of obsolete identifiers.""" return { term.identifier @@ -1060,7 +1059,7 @@ def get_obsolete(self, *, use_tqdm: bool = False) -> Set[str]: def iterate_id_species( self, *, prefix: Optional[str] = None, use_tqdm: bool = False - ) -> Iterable[Tuple[str, str]]: + ) -> Iterable[tuple[str, str]]: """Iterate over terms' identifiers and respective species (if available).""" if prefix is None: prefix = NCBITAXON_PREFIX @@ -1087,7 +1086,7 @@ def get_typedef_df(self, use_tqdm: bool = False) -> pd.DataFrame: ] return pd.DataFrame(rows, columns=["prefix", "identifier", "name"]) - def iter_typedef_id_name(self) -> Iterable[Tuple[str, str]]: + def iter_typedef_id_name(self) -> Iterable[tuple[str, str]]: """Iterate over typedefs' identifiers and their respective names.""" for typedef in self.typedefs or []: yield typedef.identifier, typedef.name @@ -1100,7 +1099,7 @@ def get_typedef_id_name_mapping(self) -> Mapping[str, str]: # PROPS # ######### - def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[Tuple[Term, str, str]]: + def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, str, str]]: """Iterate over tuples of terms, properties, and their values.""" # TODO if property_prefix is set, try removing that as a prefix from all prop strings. for term in self._iter_terms( @@ -1111,10 +1110,10 @@ def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[Tuple[Term, @property def properties_header(self): - """Property dataframe header.""" # noqa:D401 + """Property dataframe header.""" return [f"{self.ontology}_id", "property", "value"] - def iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[Tuple[str, str, str]]: + def iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]: """Iterate property rows.""" for term, prop, value in self.iterate_properties(use_tqdm=use_tqdm): yield term.identifier, prop, value @@ -1128,7 +1127,7 @@ def get_properties_df(self, *, use_tqdm: bool = False) -> pd.DataFrame: def iterate_filtered_properties( self, prop: str, *, use_tqdm: bool = False - ) -> Iterable[Tuple[Term, str]]: + ) -> Iterable[tuple[Term, str]]: """Iterate over tuples of terms and the values for the given property.""" for term in self._iter_terms(use_tqdm=use_tqdm): for _prop, value in term.iterate_properties(): @@ -1156,7 +1155,7 @@ def get_filtered_properties_mapping( def get_filtered_properties_multimapping( self, prop: str, *, use_tqdm: bool = False - ) -> Mapping[str, List[str]]: + ) -> Mapping[str, list[str]]: """Get a mapping from a term's identifier to the property values.""" return multidict( (term.identifier, value) @@ -1169,7 +1168,7 @@ def get_filtered_properties_multimapping( def iterate_relations( self, *, use_tqdm: bool = False - ) -> Iterable[Tuple[Term, TypeDef, Reference]]: + ) -> Iterable[tuple[Term, TypeDef, Reference]]: """Iterate over tuples of terms, relations, and their targets.""" for term in self._iter_terms( use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting relations" @@ -1186,17 +1185,23 @@ def iterate_relations( def iter_relation_rows( self, use_tqdm: bool = False - ) -> Iterable[Tuple[str, str, str, str, str]]: + ) -> Iterable[tuple[str, str, str, str, str]]: """Iterate the relations' rows.""" for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm): - yield term.identifier, typedef.prefix, typedef.identifier, reference.prefix, reference.identifier + yield ( + term.identifier, + typedef.prefix, + typedef.identifier, + reference.prefix, + reference.identifier, + ) def iterate_filtered_relations( self, relation: RelationHint, *, use_tqdm: bool = False, - ) -> Iterable[Tuple[Term, Reference]]: + ) -> Iterable[tuple[Term, Reference]]: """Iterate over tuples of terms and ther targets for the given relation.""" _target_prefix, _target_identifier = get_reference_tuple(relation) for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm): @@ -1205,7 +1210,7 @@ def iterate_filtered_relations( @property def relations_header(self) -> Sequence[str]: - """Header for the relations dataframe.""" # noqa:D401 + """Header for the relations dataframe.""" return [f"{self.ontology}_id", RELATION_PREFIX, RELATION_ID, TARGET_PREFIX, TARGET_ID] def get_relations_df(self, *, use_tqdm: bool = False) -> pd.DataFrame: @@ -1236,7 +1241,7 @@ def iterate_filtered_relations_filtered_targets( target_prefix: str, *, use_tqdm: bool = False, - ) -> Iterable[Tuple[Term, Reference]]: + ) -> Iterable[tuple[Term, Reference]]: """Iterate over relationships between one identifier and another.""" for term, reference in self.iterate_filtered_relations( relation=relation, use_tqdm=use_tqdm @@ -1259,9 +1264,9 @@ def get_relation_mapping( >>> from pyobo.sources.hgnc import get_obo >>> obo = get_obo() - >>> human_mapt_hgnc_id = '6893' - >>> mouse_mapt_mgi_id = '97180' - >>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping('ro:HOM0000017', 'mgi') + >>> human_mapt_hgnc_id = "6893" + >>> mouse_mapt_mgi_id = "97180" + >>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping("ro:HOM0000017", "mgi") >>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id] """ return { @@ -1285,9 +1290,9 @@ def get_relation( >>> from pyobo.sources.hgnc import get_obo >>> obo = get_obo() - >>> human_mapt_hgnc_id = '6893' - >>> mouse_mapt_mgi_id = '97180' - >>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, 'ro:HOM0000017', 'mgi') + >>> human_mapt_hgnc_id = "6893" + >>> mouse_mapt_mgi_id = "97180" + >>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, "ro:HOM0000017", "mgi") """ relation_mapping = self.get_relation_mapping( relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm @@ -1300,7 +1305,7 @@ def get_relation_multimapping( target_prefix: str, *, use_tqdm: bool = False, - ) -> Mapping[str, List[str]]: + ) -> Mapping[str, list[str]]: """Get a mapping from the term's identifier to the target's identifiers.""" return multidict( (term.identifier, reference.identifier) @@ -1316,7 +1321,7 @@ def get_id_multirelations_mapping( typedef: TypeDef, *, use_tqdm: bool = False, - ) -> Mapping[str, List[Reference]]: + ) -> Mapping[str, list[Reference]]: """Get a mapping from identifiers to a list of all references for the given relation.""" return multidict( (term.identifier, reference) @@ -1330,18 +1335,18 @@ def get_id_multirelations_mapping( # SYNONYMS # ############ - def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[Tuple[Term, Synonym]]: + def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Synonym]]: """Iterate over pairs of term and synonym object.""" for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"): for synonym in sorted(term.synonyms, key=attrgetter("name")): yield term, synonym - def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[Tuple[str, str]]: + def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]: """Iterate over pairs of identifier and synonym text.""" for term, synonym in self.iterate_synonyms(use_tqdm=use_tqdm): yield term.identifier, synonym.name - def get_id_synonyms_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, List[str]]: + def get_id_synonyms_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, list[str]]: """Get a mapping from identifiers to a list of sorted synonym strings.""" return multidict(self.iterate_synonym_rows(use_tqdm=use_tqdm)) @@ -1349,7 +1354,7 @@ def get_id_synonyms_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, Lis # XREFS # ######### - def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[Tuple[Term, Reference]]: + def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Reference]]: """Iterate over xrefs.""" for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"): for xref in term.xrefs: @@ -1357,20 +1362,20 @@ def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[Tuple[Term, Refer def iterate_filtered_xrefs( self, prefix: str, *, use_tqdm: bool = False - ) -> Iterable[Tuple[Term, Reference]]: + ) -> Iterable[tuple[Term, Reference]]: """Iterate over xrefs to a given prefix.""" for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm): if xref.prefix == prefix: yield term, xref - def iterate_xref_rows(self, *, use_tqdm: bool = False) -> Iterable[Tuple[str, str, str]]: + def iterate_xref_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]: """Iterate over terms' identifiers, xref prefixes, and xref identifiers.""" for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm): yield term.identifier, xref.prefix, xref.identifier @property def xrefs_header(self): - """The header for the xref dataframe.""" # noqa:D401 + """The header for the xref dataframe.""" return [f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID] def get_xrefs_df(self, *, use_tqdm: bool = False) -> pd.DataFrame: @@ -1391,7 +1396,7 @@ def get_filtered_xrefs_mapping( def get_filtered_multixrefs_mapping( self, prefix: str, *, use_tqdm: bool = False - ) -> Mapping[str, List[str]]: + ) -> Mapping[str, list[str]]: """Get filtered xrefs as a dictionary.""" return multidict( (term.identifier, xref.identifier) @@ -1402,18 +1407,18 @@ def get_filtered_multixrefs_mapping( # ALTS # ######## - def iterate_alts(self) -> Iterable[Tuple[Term, Reference]]: + def iterate_alts(self) -> Iterable[tuple[Term, Reference]]: """Iterate over alternative identifiers.""" for term in self: for alt in term.alt_ids: yield term, alt - def iterate_alt_rows(self) -> Iterable[Tuple[str, str]]: + def iterate_alt_rows(self) -> Iterable[tuple[str, str]]: """Iterate over pairs of terms' primary identifiers and alternate identifiers.""" for term, alt in self.iterate_alts(): yield term.identifier, alt.identifier - def get_id_alts_mapping(self) -> Mapping[str, List[str]]: + def get_id_alts_mapping(self) -> Mapping[str, list[str]]: """Get a mapping from identifiers to a list of alternative identifiers.""" return multidict((term.identifier, alt.identifier) for term, alt in self.iterate_alts()) @@ -1423,14 +1428,14 @@ def make_ad_hoc_ontology( _name: str, _auto_generated_by: Optional[str] = None, _format_version: str = "1.2", - _typedefs: Optional[List[TypeDef]] = None, - _synonym_typedefs: Optional[List[SynonymTypeDef]] = None, + _typedefs: Optional[list[TypeDef]] = None, + _synonym_typedefs: Optional[list[SynonymTypeDef]] = None, _date: Optional[datetime] = None, _data_version: Optional[str] = None, _idspaces: Optional[Mapping[str, str]] = None, - _root_terms: Optional[List[Reference]] = None, + _root_terms: Optional[list[Reference]] = None, *, - terms: List[Term], + terms: list[Term], ) -> "Obo": """Make an ad-hoc ontology.""" @@ -1457,7 +1462,7 @@ def iter_terms(self, force: bool = False) -> Iterable[Term]: return AdHocOntology() -def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> List[Mapping[str, Any]]: +def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> list[Mapping[str, Any]]: """Convert the type defs.""" if not typedefs: return [] @@ -1470,7 +1475,7 @@ def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]: return typedef.reference.model_dump() -def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> List[str]: +def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> list[str]: """Convert the synonym type defs.""" if not synonym_typedefs: return [] diff --git a/src/pyobo/struct/typedef.py b/src/pyobo/struct/typedef.py index 719d1ea7..7e253274 100644 --- a/src/pyobo/struct/typedef.py +++ b/src/pyobo/struct/typedef.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- - """Default typedefs, references, and other structures.""" +from collections.abc import Iterable from dataclasses import dataclass, field -from typing import Dict, Iterable, List, Optional, Tuple, Union +from typing import Optional, Union from .reference import Reference, Referenced from ..identifier_utils import normalize_curie @@ -71,11 +70,11 @@ class TypeDef(Referenced): is_symmetric: Optional[bool] = None domain: Optional[Reference] = None range: Optional[Reference] = None - parents: List[Reference] = field(default_factory=list) - xrefs: List[Reference] = field(default_factory=list) + parents: list[Reference] = field(default_factory=list) + xrefs: list[Reference] = field(default_factory=list) inverse: Optional[Reference] = None created_by: Optional[str] = None - holds_over_chain: Optional[List[Reference]] = None + holds_over_chain: Optional[list[Reference]] = None #: Whether this relationship is a metadata tag. Properties that are marked as metadata tags are #: used to record object metadata. Object metadata is additional information about an object #: that is useful to track, but does not impact the definition of the object or how it should @@ -83,7 +82,7 @@ class TypeDef(Referenced): #: structured notes about a term, for example. is_metadata_tag: Optional[bool] = None - def __hash__(self) -> int: # noqa: D105 + def __hash__(self) -> int: return hash((self.__class__, self.prefix, self.identifier)) def iterate_obo_lines(self) -> Iterable[str]: @@ -140,10 +139,10 @@ def from_curie(cls, curie: str, name: Optional[str] = None) -> "TypeDef": return cls.from_triple(prefix=prefix, identifier=identifier, name=name) -RelationHint = Union[Reference, TypeDef, Tuple[str, str], str] +RelationHint = Union[Reference, TypeDef, tuple[str, str], str] -def get_reference_tuple(relation: RelationHint) -> Tuple[str, str]: +def get_reference_tuple(relation: RelationHint) -> tuple[str, str]: """Get tuple for typedef/reference.""" if isinstance(relation, (Reference, TypeDef)): return relation.prefix, relation.identifier @@ -366,7 +365,7 @@ def get_reference_tuple(relation: RelationHint) -> Tuple[str, str]: reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True ) -default_typedefs: Dict[Tuple[str, str], TypeDef] = { +default_typedefs: dict[tuple[str, str], TypeDef] = { v.pair: v for k, v in locals().items() if isinstance(v, TypeDef) } diff --git a/src/pyobo/struct/utils.py b/src/pyobo/struct/utils.py index 5afc8870..8e24cf1b 100644 --- a/src/pyobo/struct/utils.py +++ b/src/pyobo/struct/utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Utilities for data structures for OBO.""" __all__ = [ diff --git a/src/pyobo/utils/__init__.py b/src/pyobo/utils/__init__.py index 379da213..dc51750e 100644 --- a/src/pyobo/utils/__init__.py +++ b/src/pyobo/utils/__init__.py @@ -1,3 +1 @@ -# -*- coding: utf-8 -*- - """Utilities.""" diff --git a/src/pyobo/utils/cache.py b/src/pyobo/utils/cache.py index 6776b0ef..e0e93dda 100644 --- a/src/pyobo/utils/cache.py +++ b/src/pyobo/utils/cache.py @@ -1,13 +1,12 @@ -# -*- coding: utf-8 -*- - """Utilities for caching files.""" import gzip import json import logging import os +from collections.abc import Iterable, Mapping from pathlib import Path -from typing import Generic, Iterable, List, Mapping, TypeVar, Union +from typing import Generic, TypeVar, Union import networkx as nx from pystow.cache import Cached @@ -46,6 +45,7 @@ def __init__( use_tqdm: bool = False, force: bool = False, ): + """Initialize the mapping cache.""" super().__init__(path=path, force=force) self.header = header self.use_tqdm = use_tqdm @@ -55,9 +55,11 @@ class CachedMapping(_CachedMapping[Mapping[str, str]]): """A cache for simple mappings.""" def load(self) -> Mapping[str, str]: + """Load a TSV file.""" return open_map_tsv(self.path, use_tqdm=self.use_tqdm) def dump(self, rv: Mapping[str, str]) -> None: + """Write a TSV file.""" write_map_tsv(path=self.path, header=self.header, rv=rv) @@ -77,23 +79,28 @@ def write_gzipped_graph(graph: nx.MultiDiGraph, path: Union[str, Path]) -> None: class CachedGraph(Cached[nx.MultiDiGraph]): + """A cache for multidigraphs.""" def load(self) -> nx.MultiDiGraph: + """Load a graph file.""" return get_gzipped_graph(self.path) def dump(self, rv: nx.MultiDiGraph) -> None: + """Write a graph file.""" write_gzipped_graph(rv, self.path) cached_graph = CachedGraph -class CachedMultidict(_CachedMapping[Mapping[str, List[str]]]): +class CachedMultidict(_CachedMapping[Mapping[str, list[str]]]): """A cache for complex mappings.""" - def load(self) -> Mapping[str, List[str]]: + def load(self) -> Mapping[str, list[str]]: + """Load a TSV file representing a multimap.""" return open_multimap_tsv(self.path, use_tqdm=self.use_tqdm) - def dump(self, rv: Mapping[str, List[str]]) -> None: + def dump(self, rv: Mapping[str, list[str]]) -> None: + """Write a TSV file representing a multimap.""" write_multimap_tsv(path=self.path, header=self.header, rv=rv) diff --git a/src/pyobo/utils/io.py b/src/pyobo/utils/io.py index d9e0f1d5..0a1d976c 100644 --- a/src/pyobo/utils/io.py +++ b/src/pyobo/utils/io.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """I/O utilities.""" import collections.abc @@ -8,9 +6,10 @@ import logging import time from collections import defaultdict +from collections.abc import Iterable, Mapping from contextlib import contextmanager from pathlib import Path -from typing import Dict, Iterable, List, Mapping, Optional, Set, Tuple, TypeVar, Union +from typing import Optional, TypeVar, Union from xml.etree.ElementTree import Element import pandas as pd @@ -78,7 +77,7 @@ def open_multimap_tsv( *, use_tqdm: bool = False, has_header: bool = True, -) -> Mapping[str, List[str]]: +) -> Mapping[str, list[str]]: """Load a mapping TSV file that has multiple mappings for each.""" return multidict(_help_multimap_tsv(path=path, use_tqdm=use_tqdm, has_header=has_header)) @@ -88,7 +87,7 @@ def _help_multimap_tsv( *, use_tqdm: bool = False, has_header: bool = True, -) -> Iterable[Tuple[str, str]]: +) -> Iterable[tuple[str, str]]: with open(path) as file: if has_header: next(file) # throw away header @@ -97,7 +96,7 @@ def _help_multimap_tsv( yield from get_reader(file) -def multidict(pairs: Iterable[Tuple[X, Y]]) -> Mapping[X, List[Y]]: +def multidict(pairs: Iterable[tuple[X, Y]]) -> Mapping[X, list[Y]]: """Accumulate a multidict from a list of pairs.""" rv = defaultdict(list) for key, value in pairs: @@ -105,7 +104,7 @@ def multidict(pairs: Iterable[Tuple[X, Y]]) -> Mapping[X, List[Y]]: return dict(rv) -def multisetdict(pairs: Iterable[Tuple[X, Y]]) -> Dict[X, Set[Y]]: +def multisetdict(pairs: Iterable[tuple[X, Y]]) -> dict[X, set[Y]]: """Accumulate a multisetdict from a list of pairs.""" rv = defaultdict(set) for key, value in pairs: @@ -118,7 +117,7 @@ def write_map_tsv( *, path: Union[str, Path], header: Optional[Iterable[str]] = None, - rv: Union[Iterable[Tuple[str, str]], Mapping[str, str]], + rv: Union[Iterable[tuple[str, str]], Mapping[str, str]], sep: str = "\t", ) -> None: """Write a mapping dictionary to a TSV file.""" @@ -132,7 +131,7 @@ def write_multimap_tsv( *, path: Union[str, Path], header: Iterable[str], - rv: Mapping[str, List[str]], + rv: Mapping[str, list[str]], sep: str = "\t", ) -> None: """Write a multiple mapping dictionary to a TSV file.""" @@ -144,7 +143,7 @@ def write_iterable_tsv( *, path: Union[str, Path], header: Optional[Iterable[str]] = None, - it: Iterable[Tuple[str, ...]], + it: Iterable[tuple[str, ...]], sep: str = "\t", ) -> None: """Write a mapping dictionary to a TSV file.""" diff --git a/src/pyobo/utils/iter.py b/src/pyobo/utils/iter.py index 3c45c374..e40ca30f 100644 --- a/src/pyobo/utils/iter.py +++ b/src/pyobo/utils/iter.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Tools for iterating over things.""" import csv import gzip -from typing import Iterable, List, Tuple, TypeVar +from collections.abc import Iterable +from typing import TypeVar from more_itertools import peekable @@ -18,7 +17,7 @@ Y = TypeVar("Y") -def iterate_gzips_together(a_path, b_path) -> Iterable[Tuple[str, str, List[str]]]: +def iterate_gzips_together(a_path, b_path) -> Iterable[tuple[str, str, list[str]]]: """Iterate over two gzipped files together.""" with gzip.open(a_path, mode="rt", errors="ignore") as a, gzip.open(b_path, mode="rt") as b: a = csv.reader(a, delimiter="\t", quoting=csv.QUOTE_MINIMAL) @@ -27,8 +26,8 @@ def iterate_gzips_together(a_path, b_path) -> Iterable[Tuple[str, str, List[str] def iterate_together( - a: Iterable[Tuple[X, Y]], b: Iterable[Tuple[X, Z]] -) -> Iterable[Tuple[X, Y, List[Z]]]: + a: Iterable[tuple[X, Y]], b: Iterable[tuple[X, Z]] +) -> Iterable[tuple[X, Y, list[Z]]]: """Iterate over two sorted lists that have the same keys. The lists have to have the following invariants: diff --git a/src/pyobo/utils/misc.py b/src/pyobo/utils/misc.py index 85957d83..7d2b80bf 100644 --- a/src/pyobo/utils/misc.py +++ b/src/pyobo/utils/misc.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- - """Miscellaneous utilities.""" import gzip import logging import os from datetime import datetime -from subprocess import check_output # noqa:S404 +from subprocess import check_output from typing import Optional __all__ = [ diff --git a/src/pyobo/utils/ndex_utils.py b/src/pyobo/utils/ndex_utils.py index 43475ef9..cc70a572 100644 --- a/src/pyobo/utils/ndex_utils.py +++ b/src/pyobo/utils/ndex_utils.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Utilities for caching files from NDEx.""" import json import os -from typing import Any, Iterable, List, Mapping, Tuple +from collections.abc import Iterable, Mapping +from typing import Any import requests from tqdm.auto import tqdm @@ -21,7 +20,7 @@ NDEX_BASE_URL = "http://public.ndexbio.org/v2" NETWORK_ENDPOINT = f"{NDEX_BASE_URL}/network" NETWORKSET_ENDPOINT = f"{NDEX_BASE_URL}/networkset" -CX = List[Mapping[str, Any]] +CX = list[Mapping[str, Any]] def iterate_aspect(cx: CX, aspect: str) -> Iterable[Any]: @@ -47,7 +46,7 @@ def ensure_ndex_network(prefix: str, uuid: str, force: bool = False) -> CX: def ensure_ndex_network_set( prefix: str, uuid: str, use_tqdm: bool = False, force: bool = False -) -> Iterable[Tuple[str, CX]]: +) -> Iterable[tuple[str, CX]]: """Ensure the list of networks that goes with NCI PID on NDEx.""" it = _help_ensure_ndex_network_set(prefix, uuid, force=force) if use_tqdm: @@ -56,7 +55,7 @@ def ensure_ndex_network_set( yield network_uuid, ensure_ndex_network(prefix, network_uuid, force=force) -def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) -> List[str]: +def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) -> list[str]: """Ensure the list of networks that goes with NCI PID on NDEx.""" networkset_path = prefix_directory_join(prefix, name="networks.txt") if os.path.exists(networkset_path) and not force: @@ -69,5 +68,5 @@ def _help_ensure_ndex_network_set(prefix: str, uuid: str, force: bool = False) - network_uuids = res_json["networks"] with open(networkset_path, "w") as file: for network_uuid in sorted(network_uuids): - print(network_uuid, file=file) # noqa: T201 + print(network_uuid, file=file) return network_uuids diff --git a/src/pyobo/utils/path.py b/src/pyobo/utils/path.py index 4fac7643..713d7394 100644 --- a/src/pyobo/utils/path.py +++ b/src/pyobo/utils/path.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Utilities for building paths.""" import logging from pathlib import Path -from typing import Any, Callable, Dict, Literal, Optional, Union +from typing import Any, Callable, Literal, Optional, Union import pandas as pd import requests_ftp @@ -46,7 +44,8 @@ def prefix_directory_join( logger.info("[%s] got version %s", prefix, version) elif not isinstance(version, str): raise TypeError(f"Invalid type: {version} ({type(version)})") - assert version is not None + if version is None: + raise AssertionError version = cleanup_version(version, prefix=prefix) if version is not None and "/" in version: raise ValueError(f"[{prefix}] Can not have slash in version: {version}") @@ -78,7 +77,7 @@ def ensure_path( if not path.exists() and error_on_missing: raise FileNotFoundError - kwargs: Dict[str, Any] + kwargs: dict[str, Any] if verify: kwargs = {"backend": backend} else: diff --git a/src/pyobo/version.py b/src/pyobo/version.py index 1e941995..5003b550 100644 --- a/src/pyobo/version.py +++ b/src/pyobo/version.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- - """Version information for PyOBO. Run with ``python -m pyobo.version`` """ import os -from subprocess import CalledProcessError, check_output # noqa: S404 +from subprocess import CalledProcessError, check_output __all__ = [ "VERSION", @@ -21,7 +19,7 @@ def get_git_hash() -> str: """Get the PyOBO git hash.""" with open(os.devnull, "w") as devnull: try: - ret = check_output( # noqa: S603,S607 + ret = check_output( ["git", "rev-parse", "HEAD"], cwd=os.path.dirname(__file__), stderr=devnull, diff --git a/src/pyobo/xrefdb/__init__.py b/src/pyobo/xrefdb/__init__.py index dd83c914..053ed6bc 100644 --- a/src/pyobo/xrefdb/__init__.py +++ b/src/pyobo/xrefdb/__init__.py @@ -1,3 +1 @@ -# -*- coding: utf-8 -*- - """Extraction of mappings from OBO documents.""" diff --git a/src/pyobo/xrefdb/canonicalizer.py b/src/pyobo/xrefdb/canonicalizer.py index dbd76aa4..6e0d7c93 100644 --- a/src/pyobo/xrefdb/canonicalizer.py +++ b/src/pyobo/xrefdb/canonicalizer.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Tools for canonicalizing a CURIE based on a priority list.""" +from collections.abc import Iterable, Mapping from dataclasses import dataclass, field from functools import lru_cache -from typing import Iterable, List, Mapping, Optional, Set, Tuple +from typing import Optional import networkx as nx import pandas as pd @@ -34,7 +33,7 @@ class Canonicalizer: graph: nx.Graph #: A list of prefixes. The ones with the lower index are higher priority - priority: Optional[List[str]] = None + priority: Optional[list[str]] = None #: Longest length paths allowed cutoff: int = 5 @@ -54,7 +53,7 @@ def _key(self, curie: str) -> Optional[int]: def _get_priority_dict(self, curie: str) -> Mapping[str, int]: return dict(self._iterate_priority_targets(curie)) - def _iterate_priority_targets(self, curie: str) -> Iterable[Tuple[str, int]]: + def _iterate_priority_targets(self, curie: str) -> Iterable[tuple[str, int]]: for target in nx.single_source_shortest_path(self.graph, curie, cutoff=self.cutoff): priority = self._key(target) if priority is not None: @@ -79,20 +78,20 @@ def get_default(cls, priority: Optional[Iterable[str]] = None) -> "Canonicalizer return cls._get_default_helper(priority=priority) @classmethod - @lru_cache() - def _get_default_helper(cls, priority: Optional[Tuple[str, ...]] = None) -> "Canonicalizer": + @lru_cache + def _get_default_helper(cls, priority: Optional[tuple[str, ...]] = None) -> "Canonicalizer": """Help get the default canonicalizer.""" graph = cls._get_default_graph() return cls(graph=graph, priority=list(priority) if priority else None) @staticmethod - @lru_cache() + @lru_cache def _get_default_graph() -> nx.Graph: df = resource_utils.ensure_inspector_javert_df() graph = get_graph_from_xref_df(df) return graph - def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[Tuple[str, str]]: + def iterate_flat_mapping(self, use_tqdm: bool = True) -> Iterable[tuple[str, str]]: """Iterate over the canonical mapping from all nodes to their canonical CURIEs.""" nodes = self.graph.nodes() if use_tqdm: @@ -114,13 +113,13 @@ def single_source_shortest_path( self, curie: str, cutoff: Optional[int] = None, - ) -> Optional[Mapping[str, List[Mapping[str, str]]]]: + ) -> Optional[Mapping[str, list[Mapping[str, str]]]]: """Get all shortest paths between given entity and its equivalent entities.""" return single_source_shortest_path(graph=self.graph, curie=curie, cutoff=cutoff) def all_shortest_paths( self, source_curie: str, target_curie: str - ) -> List[List[Mapping[str, str]]]: + ) -> list[list[Mapping[str, str]]]: """Get all shortest paths between the two entities.""" return all_shortest_paths( graph=self.graph, source_curie=source_curie, target_curie=target_curie @@ -134,11 +133,14 @@ def from_df(cls, df: pd.DataFrame) -> "Canonicalizer": def all_shortest_paths( graph: nx.Graph, source_curie: str, target_curie: str -) -> List[List[Mapping[str, str]]]: +) -> list[list[Mapping[str, str]]]: """Get all shortest paths between the two CURIEs.""" _paths = nx.all_shortest_paths(graph, source=source_curie, target=target_curie) return [ - [dict(source=s, target=t, provenance=graph[s][t]["source"]) for s, t in pairwise(_path)] + [ + {"source": s, "target": t, "provenance": graph[s][t]["source"]} + for s, t in pairwise(_path) + ] for _path in _paths ] @@ -147,7 +149,7 @@ def single_source_shortest_path( graph: nx.Graph, curie: str, cutoff: Optional[int] = None, -) -> Optional[Mapping[str, List[Mapping[str, str]]]]: +) -> Optional[Mapping[str, list[Mapping[str, str]]]]: """Get the shortest path from the CURIE to all elements of its equivalence class. Things that didn't work: @@ -156,7 +158,9 @@ def single_source_shortest_path( ------------ .. code-block:: python - for curies in tqdm(nx.connected_components(graph), desc='filling connected components', unit_scale=True): + for curies in tqdm( + nx.connected_components(graph), desc="filling connected components", unit_scale=True + ): for c1, c2 in itt.combinations(curies, r=2): if not graph.has_edge(c1, c2): graph.add_edge(c1, c2, inferred=True) @@ -165,7 +169,9 @@ def single_source_shortest_path( ------------ .. code-block:: python - for curie in tqdm(graph, total=graph.number_of_nodes(), desc='mapping connected components', unit_scale=True): + for curie in tqdm( + graph, total=graph.number_of_nodes(), desc="mapping connected components", unit_scale=True + ): for incident_curie in nx.node_connected_component(graph, curie): if not graph.has_edge(curie, incident_curie): graph.add_edge(curie, incident_curie, inferred=True) @@ -177,13 +183,16 @@ def single_source_shortest_path( return None rv = nx.single_source_shortest_path(graph, curie, cutoff=cutoff) return { - k: [dict(source=s, target=t, provenance=graph[s][t]["provenance"]) for s, t in pairwise(v)] + k: [ + {"source": s, "target": t, "provenance": graph[s][t]["provenance"]} + for s, t in pairwise(v) + ] for k, v in rv.items() if k != curie # don't map to self } -def get_equivalent(curie: str, cutoff: Optional[int] = None) -> Set[str]: +def get_equivalent(curie: str, cutoff: Optional[int] = None) -> set[str]: """Get equivalent CURIEs.""" canonicalizer = Canonicalizer.get_default() r = canonicalizer.single_source_shortest_path(curie=curie, cutoff=cutoff) diff --git a/src/pyobo/xrefdb/priority.py b/src/pyobo/xrefdb/priority.py index aa55f2fe..32e67ec1 100644 --- a/src/pyobo/xrefdb/priority.py +++ b/src/pyobo/xrefdb/priority.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Configuration for the default priority list.""" import bioregistry diff --git a/src/pyobo/xrefdb/sources/__init__.py b/src/pyobo/xrefdb/sources/__init__.py index 485a4c05..3a0a0d42 100644 --- a/src/pyobo/xrefdb/sources/__init__.py +++ b/src/pyobo/xrefdb/sources/__init__.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- - """Sources of xrefs not from OBO.""" import logging +from collections.abc import Iterable, Mapping from functools import lru_cache -from typing import Callable, Iterable, Mapping, Optional +from typing import Callable, Optional import pandas as pd from class_resolver import FunctionResolver @@ -22,7 +21,7 @@ XrefGetter = Callable[[], pd.DataFrame] -@lru_cache() +@lru_cache def _get_xref_plugins() -> Mapping[str, XrefGetter]: resolver: FunctionResolver[XrefGetter] = FunctionResolver.from_entrypoint("pyobo.xrefs") return resolver.lookup_dict diff --git a/src/pyobo/xrefdb/sources/biomappings.py b/src/pyobo/xrefdb/sources/biomappings.py index 5a40bb93..405f103a 100644 --- a/src/pyobo/xrefdb/sources/biomappings.py +++ b/src/pyobo/xrefdb/sources/biomappings.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Get the Biomappings manually curated equivalences.""" import pandas as pd diff --git a/src/pyobo/xrefdb/sources/cbms2019.py b/src/pyobo/xrefdb/sources/cbms2019.py index 8649b115..70fd00de 100644 --- a/src/pyobo/xrefdb/sources/cbms2019.py +++ b/src/pyobo/xrefdb/sources/cbms2019.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Cross references from cbms2019. .. seealso:: https://github.com/pantapps/cbms2019 diff --git a/src/pyobo/xrefdb/sources/chembl.py b/src/pyobo/xrefdb/sources/chembl.py index a3b04b6c..efb488b8 100644 --- a/src/pyobo/xrefdb/sources/chembl.py +++ b/src/pyobo/xrefdb/sources/chembl.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Get ChEMBL xrefs.""" from typing import Optional diff --git a/src/pyobo/xrefdb/sources/compath.py b/src/pyobo/xrefdb/sources/compath.py index f238d9a6..4de3e04f 100644 --- a/src/pyobo/xrefdb/sources/compath.py +++ b/src/pyobo/xrefdb/sources/compath.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Import ComPath mappings between pathways.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd from pystow.utils import get_commit diff --git a/src/pyobo/xrefdb/sources/famplex.py b/src/pyobo/xrefdb/sources/famplex.py index 78155a42..d922ef01 100644 --- a/src/pyobo/xrefdb/sources/famplex.py +++ b/src/pyobo/xrefdb/sources/famplex.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- - """Get FamPlex xrefs.""" import logging +from collections.abc import Mapping from functools import lru_cache -from typing import Mapping, Tuple import bioregistry import pandas as pd @@ -50,8 +48,8 @@ def get_famplex_xrefs_df(force: bool = False) -> pd.DataFrame: return df -@lru_cache() -def get_remapping(force: bool = False) -> Mapping[Tuple[str, str], Tuple[str, str, str]]: +@lru_cache +def get_remapping(force: bool = False) -> Mapping[tuple[str, str], tuple[str, str, str]]: """Get a mapping from database/identifier pairs to famplex identifiers.""" df = _get_famplex_df(force=force) rv = {} diff --git a/src/pyobo/xrefdb/sources/gilda.py b/src/pyobo/xrefdb/sources/gilda.py index a9cc6042..3754785e 100644 --- a/src/pyobo/xrefdb/sources/gilda.py +++ b/src/pyobo/xrefdb/sources/gilda.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Cross references from Gilda. .. seealso:: https://github.com/indralabs/gilda diff --git a/src/pyobo/xrefdb/sources/intact.py b/src/pyobo/xrefdb/sources/intact.py index 6f9d9341..e4436516 100644 --- a/src/pyobo/xrefdb/sources/intact.py +++ b/src/pyobo/xrefdb/sources/intact.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Get the xrefs from IntAct.""" -from typing import Mapping +from collections.abc import Mapping import pandas as pd @@ -49,7 +47,8 @@ def get_complexportal_mapping() -> Mapping[str, str]: .. code-block:: python from pyobo import get_filtered_xrefs - intact_complexportal_mapping = get_filtered_xrefs('intact', 'complexportal') + + intact_complexportal_mapping = get_filtered_xrefs("intact", "complexportal") """ @cached_mapping( @@ -87,7 +86,8 @@ def get_reactome_mapping() -> Mapping[str, str]: .. code-block:: python from pyobo import get_filtered_xrefs - intact_complexportal_mapping = get_filtered_xrefs('intact', 'reactome') + + intact_complexportal_mapping = get_filtered_xrefs("intact", "reactome") """ @cached_mapping( diff --git a/src/pyobo/xrefdb/sources/ncit.py b/src/pyobo/xrefdb/sources/ncit.py index 57db5192..8dd8f098 100644 --- a/src/pyobo/xrefdb/sources/ncit.py +++ b/src/pyobo/xrefdb/sources/ncit.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- - """Import NCIT mappings.""" -from typing import Iterable +from collections.abc import Iterable import pandas as pd diff --git a/src/pyobo/xrefdb/sources/pubchem.py b/src/pyobo/xrefdb/sources/pubchem.py index 09262a48..9aa88787 100644 --- a/src/pyobo/xrefdb/sources/pubchem.py +++ b/src/pyobo/xrefdb/sources/pubchem.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- - """Get xrefs from PubChem Compound to MeSH.""" from typing import Optional import pandas as pd -from ...api.utils import get_version +from ...api.utils import safe_get_version from ...constants import XREF_COLUMNS from ...sources.pubchem import _get_pubchem_extras_url, get_pubchem_id_to_mesh_id @@ -18,8 +16,7 @@ def get_pubchem_mesh_df(version: Optional[str] = None) -> pd.DataFrame: """Get PubChem Compound-MeSH xrefs.""" if version is None: - version = get_version("pubchem") - assert version is not None + version = safe_get_version("pubchem") cid_mesh_url = _get_pubchem_extras_url(version, "CID-MeSH") return pd.DataFrame( [ diff --git a/src/pyobo/xrefdb/sources/wikidata.py b/src/pyobo/xrefdb/sources/wikidata.py index 12069e92..6a9f98ba 100644 --- a/src/pyobo/xrefdb/sources/wikidata.py +++ b/src/pyobo/xrefdb/sources/wikidata.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Get Wikidata xrefs. Run with ``python -m pyobo.xrefdb.sources.wikidata``. @@ -7,7 +5,7 @@ import json import logging -from typing import Iterable, Tuple +from collections.abc import Iterable import bioregistry import click @@ -68,7 +66,7 @@ def get_wikidata_df(prefix: str, wikidata_property: str) -> pd.DataFrame: def iter_wikidata_mappings( wikidata_property: str, *, cache: bool = True -) -> Iterable[Tuple[str, str]]: +) -> Iterable[tuple[str, str]]: """Iterate over Wikidata xrefs.""" path = WIKIDATA_MAPPING_DIRECTORY.join(name=f"{wikidata_property}.json") if path.exists() and cache: diff --git a/src/pyobo/xrefdb/xrefs_pipeline.py b/src/pyobo/xrefdb/xrefs_pipeline.py index a95e6098..1b3776a4 100644 --- a/src/pyobo/xrefdb/xrefs_pipeline.py +++ b/src/pyobo/xrefdb/xrefs_pipeline.py @@ -1,11 +1,10 @@ -# -*- coding: utf-8 -*- - """Pipeline for extracting all xrefs from OBO documents available.""" import gzip import itertools as itt import logging -from typing import Iterable, Optional, Tuple, cast +from collections.abc import Iterable +from typing import Optional, cast import bioregistry import networkx as nx @@ -83,7 +82,7 @@ def _iter_metadata(**kwargs): yield prefix, version, data["date"], bioregistry.is_deprecated(prefix) -def _iter_names(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, str]]: +def _iter_names(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]: """Iterate over all prefix-identifier-name triples we can get. :param leave: should the tqdm be left behind? @@ -100,13 +99,13 @@ def _iter_names(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, str]] yield pubchem.PREFIX, identifier, name -def _iter_species(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, str]]: +def _iter_species(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]: """Iterate over all prefix-identifier-species triples we can get.""" yield from iter_helper(get_id_species_mapping, leave=leave, **kwargs) # TODO ncbigene -def _iter_definitions(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, str]]: +def _iter_definitions(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]: """Iterate over all prefix-identifier-descriptions triples we can get.""" yield from iter_helper(get_id_definition_mapping, leave=leave, **kwargs) yield from _iter_ncbigene(1, 8) @@ -114,7 +113,7 @@ def _iter_definitions(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, def _iter_alts( leave: bool = False, strict: bool = True, **kwargs -) -> Iterable[Tuple[str, str, str]]: +) -> Iterable[tuple[str, str, str]]: for prefix, identifier, alts in iter_helper( get_id_to_alts, leave=leave, strict=strict, **kwargs ): @@ -122,7 +121,7 @@ def _iter_alts( yield prefix, identifier, alt -def _iter_synonyms(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, str]]: +def _iter_synonyms(leave: bool = False, **kwargs) -> Iterable[tuple[str, str, str]]: """Iterate over all prefix-identifier-synonym triples we can get. :param leave: should the tqdm be left behind? @@ -132,26 +131,26 @@ def _iter_synonyms(leave: bool = False, **kwargs) -> Iterable[Tuple[str, str, st yield prefix, identifier, synonym -def _iter_typedefs(**kwargs) -> Iterable[Tuple[str, str, str, str]]: +def _iter_typedefs(**kwargs) -> Iterable[tuple[str, str, str, str]]: """Iterate over all prefix-identifier-name triples we can get.""" for prefix, df in iter_helper_helper(get_typedef_df, **kwargs): for t in df.values: if all(t): - yield cast(Tuple[str, str, str, str], (prefix, *t)) + yield cast(tuple[str, str, str, str], (prefix, *t)) -def _iter_relations(**kwargs) -> Iterable[Tuple[str, str, str, str, str, str]]: +def _iter_relations(**kwargs) -> Iterable[tuple[str, str, str, str, str, str]]: for prefix, df in iter_helper_helper(get_relations_df, **kwargs): for t in df.values: if all(t): - yield cast(Tuple[str, str, str, str, str, str], (prefix, *t)) + yield cast(tuple[str, str, str, str, str, str], (prefix, *t)) -def _iter_properties(**kwargs) -> Iterable[Tuple[str, str, str, str]]: +def _iter_properties(**kwargs) -> Iterable[tuple[str, str, str, str]]: for prefix, df in iter_helper_helper(get_properties_df, **kwargs): for t in df.values: if all(t): - yield cast(Tuple[str, str, str, str], (prefix, *t)) + yield cast(tuple[str, str, str, str], (prefix, *t)) def _iter_xrefs( @@ -161,7 +160,7 @@ def _iter_xrefs( skip_below: Optional[str] = None, strict: bool = True, **kwargs, -) -> Iterable[Tuple[str, str, str, str, str]]: +) -> Iterable[tuple[str, str, str, str, str]]: it = iter_helper_helper( get_xrefs_df, use_tqdm=use_tqdm, @@ -175,7 +174,7 @@ def _iter_xrefs( for row in df.values: if any(not element for element in row): continue - yield cast(Tuple[str, str, str, str, str], (prefix, *row, prefix)) + yield cast(tuple[str, str, str, str, str], (prefix, *row, prefix)) for df in iter_xref_plugins(skip_below=skip_below): df.dropna(inplace=True) yield from tqdm(df.values, leave=False, total=len(df.index), unit_scale=True) diff --git a/tests/__init__.py b/tests/__init__.py index 48407ae8..6f3861dd 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1 @@ -# -*- coding: utf-8 -*- - """Tests for PyOBO.""" diff --git a/tests/constants.py b/tests/constants.py index 99ed4889..0442ea7c 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Constants for tests for PyOBO.""" import pathlib diff --git a/tests/test_alt_ids.py b/tests/test_alt_ids.py index e99d69d5..20dea8d5 100644 --- a/tests/test_alt_ids.py +++ b/tests/test_alt_ids.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for alternative identifiers.""" import unittest diff --git a/tests/test_caches.py b/tests/test_caches.py index 7c476e60..9b4c4c52 100644 --- a/tests/test_caches.py +++ b/tests/test_caches.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for PyOBO caches.""" import os @@ -25,7 +23,7 @@ def test_mapping(self): @cached_mapping(path=path, header=header) def _get_mapping(): time.sleep(sleep_time) - return dict(a="x", b="y", c="z") + return {"a": "x", "b": "y", "c": "z"} start_time = time.time() rv1 = _get_mapping() @@ -50,7 +48,7 @@ def _get_mapping(): def _help_test_mapping(self, d): self.assertIsNotNone(d) self.assertEqual(3, len(d)) - self.assertEqual(dict(a="x", b="y", c="z"), d) + self.assertEqual({"a": "x", "b": "y", "c": "z"}, d) def test_multidict(self): """Test caching a multidict.""" @@ -61,7 +59,7 @@ def test_multidict(self): @cached_multidict(path=path, header=header) def _get_multidict(): time.sleep(sleep_time) - return dict(a=["a1", "a2"], b=["b1"], c=["c1", "c2"]) + return {"a": ["a1", "a2"], "b": ["b1"], "c": ["c1", "c2"]} start_time = time.time() rv1 = _get_multidict() @@ -85,4 +83,4 @@ def _get_multidict(): def _help_test_multidict(self, d): self.assertIsNotNone(d) self.assertEqual(3, len(d)) - self.assertEqual(dict(a=["a1", "a2"], b=["b1"], c=["c1", "c2"]), d) + self.assertEqual({"a": ["a1", "a2"], "b": ["b1"], "c": ["c1", "c2"]}, d) diff --git a/tests/test_extract.py b/tests/test_extract.py index b47e1d83..56ea646e 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for PyOBO.""" import unittest diff --git a/tests/test_get.py b/tests/test_get.py index 8db9d087..5cc2ad18 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for getting OBO.""" import unittest @@ -188,7 +186,7 @@ def test_get_node_properties(self): t_prop = "http://purl.obolibrary.org/obo/chebi/monoisotopicmass" self.assertIn(t_prop, {prop for prop, value in properties}) self.assertEqual(1, sum(prop == t_prop for prop, value in properties)) - value = [value for prop, value in properties if prop == t_prop][0] + value = next(value for prop, value in properties if prop == t_prop) self.assertEqual("261.28318", value) def test_get_node_parents(self): diff --git a/tests/test_gmt.py b/tests/test_gmt.py index 52ba65b7..6b95a9b8 100644 --- a/tests/test_gmt.py +++ b/tests/test_gmt.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """GMT tests.""" import unittest diff --git a/tests/test_ground.py b/tests/test_ground.py index f244e933..d3d348eb 100644 --- a/tests/test_ground.py +++ b/tests/test_ground.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for PyOBO grounding.""" import unittest diff --git a/tests/test_mapper.py b/tests/test_mapper.py index b59bd779..dbd2ad06 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Test mapping program.""" import unittest diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index 9072dbbd..6082487c 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -1,3 +1 @@ -# -*- coding: utf-8 -*- - """Tests for sources in PyOBO.""" diff --git a/tests/test_sources/test_famplex.py b/tests/test_sources/test_famplex.py index 65a896e1..532c4e21 100644 --- a/tests/test_sources/test_famplex.py +++ b/tests/test_sources/test_famplex.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for famplex.""" import unittest diff --git a/tests/test_struct.py b/tests/test_struct.py index 29adbc6a..d65e9a36 100644 --- a/tests/test_struct.py +++ b/tests/test_struct.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Tests for the OBO data structures.""" import unittest diff --git a/tests/test_utils.py b/tests/test_utils.py index bc4e8bae..e990013f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Test iteration tools.""" import unittest diff --git a/tests/test_version_pins.py b/tests/test_version_pins.py index 1fb04a42..107fa454 100644 --- a/tests/test_version_pins.py +++ b/tests/test_version_pins.py @@ -1,6 +1,5 @@ -# -*- coding: utf-8 -*- - """Tests for PyOBO version pins.""" + import os import unittest from unittest import mock diff --git a/tox.ini b/tox.ini index 2b21a3d7..626c0ea9 100644 --- a/tox.ini +++ b/tox.ini @@ -4,80 +4,102 @@ # and then run "tox" from this directory. [tox] -isolated_build = true +# To use a PEP 517 build-backend you are required to configure tox to use an isolated_build: +# https://tox.readthedocs.io/en/latest/example/package.html +isolated_build = True + +# These environments are run in order if you just use `tox`: envlist = # always keep coverage-clean first - # coverage-clean - # code linters/stylers + coverage-clean + # code formatters + format + # format-docs + # Code quality assessment manifest pyroma lint - flake8 - # mypy - # documentation linters/checkers + mypy + # Documentation quality assurance doc8 - readme - docs + docstr-coverage + docs-test # the actual tests py - py + doctests # always keep coverage-report last # coverage-report [testenv] -commands = coverage run -p -m pytest --durations=20 {posargs:tests} +description = Run unit and integration tests. +# Runs on the "tests" directory by default, or passes the positional +# arguments from `tox -e py ... +commands = + coverage run -p -m pytest --durations=20 {posargs:tests} + coverage combine + coverage xml extras = + # See the [project.optional-dependencies] entry in pyproject.toml for "tests" tests [testenv:coverage-clean] +description = Remove testing coverage artifacts. deps = coverage skip_install = true commands = coverage erase -[testenv:manifest] -deps = check-manifest -skip_install = true -commands = check-manifest +[testenv:doctests] +description = Test that documentation examples run properly. +commands = + xdoctest -m src +deps = + xdoctest + pygments -[testenv:flake8] -skip_install = true +[testenv:treon] +description = Test that notebooks can run to completion +commands = + treon notebooks/ deps = - # darglint - flake8 - # flake8-bandit - flake8-black - flake8-bugbear - flake8-colors - flake8-docstrings - flake8-isort - flake8-print - pep8-naming - pydocstyle + treon + +[testenv:format] +description = Format the code in a deterministic way using ruff. Note that ruff check should come before ruff format when using --fix (ref: https://github.com/astral-sh/ruff-pre-commit/blob/main/README.md) +deps = + ruff +skip_install = true commands = - flake8 src/pyobo/ tests/ -description = Run the flake8 tool with several plugins (bandit, docstrings, import order, pep8 naming). + ruff check --fix + ruff format -[testenv:lint] +[testenv:format-docs] +description = Run documentation linters. +# note that this doesn't work with sphinx-click +# or any other extension that adds extra directives deps = - black - isort + rstfmt +extras = + # See the [project.optional-dependencies] entry in pyproject.toml for "docs" + docs skip_install = true commands = - black . - isort . -description = Run black and isort to keep the code looking spiffy + rstfmt docs/source/ + +[testenv:manifest] +deps = check-manifest +skip_install = true +commands = check-manifest +description = Check that the MANIFEST.in is written properly and give feedback on how to fix it. + +[testenv:lint] +description = Check code quality using ruff and other tools. -[testenv:darglint] skip_install = true deps = - flake8 - flake8-colors - darglint + ruff commands = - flake8 \ - src/pyobo/getters.py \ - src/pyobo/api/ -description = Run the flake8 darglint tool. + ruff check + ruff format --check [testenv:pyroma] deps = @@ -88,42 +110,45 @@ commands = pyroma --min=10 . description = Run the pyroma tool to check the package friendliness of the project. [testenv:mypy] +description = Run the mypy tool to check static typing on the project. deps = mypy - types-requests + pydantic types-tabulate - types-setuptools + types-requests skip_install = true -commands = mypy --install-types --non-interactive --ignore-missing-imports src/pyobo/ -description = Run the mypy tool to check static typing on the project. +commands = mypy --install-types --non-interactive --ignore-missing-imports src/ +# TODO make strict! remove skip_install [testenv:doc8] skip_install = true deps = - sphinx<8.0 doc8 +extras = + docs commands = - doc8 docs/source/ README.rst + doc8 docs/source/ description = Run the doc8 tool to check the style of the RST files in the project docs. -[testenv:readme] -commands = rst-lint README.rst +[testenv:docstr-coverage] +description = Run the docstr-coverage tool to check documentation coverage. skip_install = true deps = - restructuredtext_lint - pygments -description = Run the rst-lint tool to check the style of the README. + docstr-coverage +commands = + docstr-coverage src/ tests/ --skip-private --skip-magic [testenv:docs] -description = Build the documentation locally. +description = Build the documentation locally, allowing warnings. extras = + # See the [project.optional-dependencies] entry in pyproject.toml for "docs" docs - agrovoc + # You might need to add additional extras if your documentation covers it commands = - python -m sphinx -W -b html -d docs/build/doctrees docs/source docs/build/html + python -m sphinx -b html -d docs/build/doctrees docs/source docs/build/html [testenv:docs-test] -description = Test building the documentation in an isolated environment. +description = Test building the documentation in an isolated environment. Warnings are considered as errors via -W. changedir = docs extras = {[testenv:docs]extras} @@ -131,19 +156,16 @@ commands = mkdir -p {envtmpdir} cp -r source {envtmpdir}/source python -m sphinx -W -b html -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/html - ; python -m sphinx -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage - ; cat {envtmpdir}/build/coverage/c.txt - ; cat {envtmpdir}/build/coverage/python.txt + # python -m sphinx -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage + # cat {envtmpdir}/build/coverage/c.txt + # cat {envtmpdir}/build/coverage/python.txt allowlist_externals = - /bin/cp - /bin/cat - /bin/mkdir - # for compatibility on GitHub actions - /usr/bin/cp - /usr/bin/cat - /usr/bin/mkdir + cp + cat + mkdir [testenv:coverage-report] +# TODO this is broken deps = coverage skip_install = true commands = @@ -155,29 +177,58 @@ commands = #################### [testenv:bumpversion] -commands = bump2version {posargs} +description = Bump the version number +commands = bump-my-version bump {posargs} skip_install = true passenv = HOME deps = - bump2version + bump-my-version [testenv:bumpversion-release] -commands = bump2version release --tag +description = Remove the -dev tag from the version +commands = bump-my-version bump release --tag skip_install = true passenv = HOME deps = - bump2version + bump-my-version [testenv:build] skip_install = true deps = - wheel - build + uv setuptools commands = - python -m build --sdist --wheel --no-isolation + uv build --sdist --wheel --no-build-isolation + +############ +# Releases # +############ + +# In order to make a release to PyPI, you'll need to take the following steps: +# +# 1. Navigate to https://pypi.org/account/register/ to register for Test PyPI +# 2. Navigate to https://pypi.org/manage/account/ and request to re-send a verification email. +# This is not sent by default, and is required to set up 2-Factor Authentication. +# 3. Get account recovery codes +# 4. Set up 2-Factor Authentication +# 5. Get an API token from https://pypi.org/manage/account/token/ +# 6. Create a file called .pypirc in the home directory if it does not already exist. +# 7. Add the following content to the .pypirc file +# +# [distutils] +# index-servers= +# pypi +# testpypi +# +# [pypi] +# username = __token__ +# password = +# +# If there's already an `index-servers =` list, just make sure you add `pypi` to it. +# More information about .pypirc can be found at https://packaging.python.org/en/latest/specifications/pypirc/ [testenv:release] +description = Release the code to PyPI so users can pip install it skip_install = true passenv = TWINE_USERNAME @@ -187,9 +238,12 @@ deps = twine >= 1.5.0 commands = {[testenv:build]commands} - twine upload --non-interactive --skip-existing dist/* + twine upload --skip-existing dist/* [testenv:finish] +description = + Run a workflow that removes -dev from the version, creates a tagged release on GitHub, + creates a release on PyPI, and bumps the version again. skip_install = true passenv = HOME @@ -197,12 +251,74 @@ passenv = TWINE_PASSWORD deps = {[testenv:release]deps} - bump2version + bump-my-version commands = - bump2version release --tag + {[testenv:bumpversion-release]commands} {[testenv:release]commands} git push --tags - bump2version patch + bump-my-version bump patch + git push +allowlist_externals = + git + +################# +# Test Releases # +################# + +# In order to test making a release to Test PyPI, you'll need to take the following steps: +# +# 1. Navigate to https://test.pypi.org/account/register/ to register for Test PyPI +# 2. Navigate to https://test.pypi.org/manage/account/ and request to re-send a verification email. +# This is not sent by default, and is required to set up 2-Factor Authentication. +# 3. Get account recovery codes +# 4. Set up 2-Factor Authentication +# 5. Get an API token from https://test.pypi.org/manage/account/token/ +# 6. Create a file called .pypirc in the home directory if it does not already exist. +# 7. Add the following content to the .pypirc file +# +# [distutils] +# index-servers= +# pypi +# testpypi +# +# [testpypi] +# repository = https://test.pypi.org/legacy/ +# username = __token__ +# password = +# +# If there's already an `index-servers =` list, just make sure you add `testpypi` to it. +# More information about .pypirc can be found at https://packaging.python.org/en/latest/specifications/pypirc/ + +[testenv:testrelease] +description = Release the code to the test PyPI site +skip_install = true +passenv = + TWINE_USERNAME + TWINE_PASSWORD +deps = + {[testenv:build]deps} + twine >= 1.5.0 +commands = + {[testenv:build]commands} + twine upload --skip-existing --repository testpypi dist/* + +[testenv:testfinish] +description = + Run a workflow that removes -dev from the version, creates a tagged release on GitHub, + creates a release on Test PyPI, and bumps the version again. +skip_install = true +passenv = + HOME + TWINE_USERNAME + TWINE_PASSWORD +deps = + {[testenv:testrelease]deps} + bump-my-version +commands = + {[testenv:bumpversion-release]commands} + {[testenv:testrelease]commands} + git push --tags + bump-my-version bump patch git push allowlist_externals = - /usr/bin/git + git