Skip to content
This repository has been archived by the owner on Oct 27, 2023. It is now read-only.

refactor ~bacdive~ => microbe #8

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .cruft.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"checkout": null,
"context": {
"cookiecutter": {
"project_name": "kg-bacdive",
"__project_slug": "kg_bacdive",
"project_name": "kg-microbe",
"__project_slug": "kg_microbe",
"project_description": "Knowledge graph construction for BacDive data",
"min_python_version": "3.9",
"full_name": "Harshad Hegde",
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/deploy-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ jobs:

- name: Build documentation.
run: |
echo ${{ secrets.GH_TOKEN }} >> kg_bacdive/token.txt
echo ${{ secrets.GH_TOKEN }} >> kg_microbe/token.txt
mkdir gh-pages
touch gh-pages/.nojekyll
cd docs/
poetry run sphinx-apidoc -o . ../kg_bacdive/ --ext-autodoc -f
poetry run sphinx-apidoc -o . ../kg_microbe/ --ext-autodoc -f
poetry run sphinx-build -b html . _build
cp -r _build/* ../gh-pages/

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/qc.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: kg-bacdive QC
name: kg-microbe QC

on:
push:
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ data/raw/robot
!data/raw/.keep
!data/raw/exclusion_branches.tsv
!data/raw/nlp/*
kg_bacdive/transform_utils/*/tmp/*/*.yaml
kg_microbe/transform_utils/*/tmp/*/*.yaml
data/transformed/ontologies/*.tsv
*.sqlite
data/merged/*
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# kg-bacdive
# kg-microbe

Knowledge graph construction for BacDive data

# Setup
- Create a vrtual environment of your choice.
- Install poetry using `pip install poetry`
- `poetry install`
- `git clone https://github.com/Knowledge-Graph-Hub/kg-bacdive.git`
- `cd kg-bacdive`
- `git clone https://github.com/Knowledge-Graph-Hub/kg-microbe.git`
- `cd kg-microbe`

## Download resources needed
- `poetry run kg download` : This will download the resources needed for this project.
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import re
import sys
from datetime import date
from kg_bacdive import __version__
from kg_microbe import __version__
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

Expand Down
2 changes: 1 addition & 1 deletion docs/modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ kg-microbe
.. toctree::
:maxdepth: 4

kg_bacdive
kg_microbe
query_utils
run
setup
2 changes: 1 addition & 1 deletion kg_bacdive/__init__.py → kg_microbe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""kg-bacdive package."""
"""kg-microbe package."""
from importlib import metadata

from .download import download
Expand Down
File renamed without changes.
File renamed without changes.
10 changes: 5 additions & 5 deletions kg_bacdive/run.py → kg_microbe/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

import click

from kg_bacdive import download as kg_download
from kg_bacdive.merge_utils.merge_kg import load_and_merge
from kg_bacdive.query import parse_query_yaml, result_dict_to_tsv, run_query
from kg_bacdive.transform import DATA_SOURCES
from kg_bacdive.transform import transform as kg_transform
from kg_microbe import download as kg_download
from kg_microbe.merge_utils.merge_kg import load_and_merge
from kg_microbe.query import parse_query_yaml, result_dict_to_tsv, run_query
from kg_microbe.transform import DATA_SOURCES
from kg_microbe.transform import transform as kg_transform


@click.group()
Expand Down
10 changes: 5 additions & 5 deletions kg_bacdive/transform.py → kg_microbe/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from pathlib import Path
from typing import List, Optional

from kg_bacdive.transform_utils.bacdive.bacdive import BacDiveTransform
from kg_bacdive.transform_utils.mediadive.mediadive import MediaDiveTransform
from kg_bacdive.transform_utils.ontology.ontology_transform import ONTOLOGIES, OntologyTransform
from kg_bacdive.transform_utils.traits.traits import TraitsTransform
from kg_microbe.transform_utils.bacdive.bacdive import BacDiveTransform
from kg_microbe.transform_utils.mediadive.mediadive import MediaDiveTransform
from kg_microbe.transform_utils.ontology.ontology_transform import ONTOLOGIES, OntologyTransform
from kg_microbe.transform_utils.traits.traits import TraitsTransform

DATA_SOURCES = {
"OntologyTransform": OntologyTransform,
Expand All @@ -30,7 +30,7 @@ def transform(
"""
Transform based on resource and class declared in DATA_SOURCES.

Call scripts in kg_bacdive/transform/[source name]/ to
Call scripts in kg_microbe/transform/[source name]/ to
transform each source into a graph format that
KGX can ingest directly, in either TSV or JSON format:
https://github.com/biolink/kgx/blob/master/data-preparation.md
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from oaklib import get_adapter
from tqdm import tqdm

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
BACDIVE_API_BASE_URL,
BACDIVE_ID_COLUMN,
BACDIVE_MEDIUM_DICT,
Expand Down Expand Up @@ -60,8 +60,8 @@
SPECIES,
STRAIN,
)
from kg_bacdive.transform_utils.transform import Transform
from kg_bacdive.utils.pandas_utils import drop_duplicates
from kg_microbe.transform_utils.transform import Transform
from kg_microbe.utils.pandas_utils import drop_duplicates


class BacDiveTransform(Transform):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from transform_utils.transform import Transform

from kg_bacdive.utils.robot_utils import convert_to_json, extract_convert_to_json
from kg_microbe.utils.robot_utils import convert_to_json, extract_convert_to_json


class YourTransform(Transform):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from oaklib import get_adapter
from tqdm import tqdm

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
CAS_RN_KEY,
CAS_RN_PREFIX,
CHEBI_KEY,
Expand Down Expand Up @@ -71,8 +71,8 @@
SOLUTIONS_COLUMN,
SOLUTIONS_KEY,
)
from kg_bacdive.transform_utils.transform import Transform
from kg_bacdive.utils.pandas_utils import drop_duplicates, establish_transitive_relationship
from kg_microbe.transform_utils.transform import Transform
from kg_microbe.utils.pandas_utils import drop_duplicates, establish_transitive_relationship


class MediaDiveTransform(Transform):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
# from kgx.transformer import Transformer
from kgx.cli.cli_utils import transform

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
EXCLUSION_TERMS_FILE,
NCBITAXON_PREFIX,
ROBOT_REMOVED_SUFFIX,
)
from kg_bacdive.utils.robot_utils import convert_to_json, remove_convert_to_json
from kg_microbe.utils.robot_utils import convert_to_json, remove_convert_to_json

from ..transform import Transform

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import yaml
from oaklib.utilities.ner_utilities import get_exclusion_token_list

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
ACTUAL_TERM_KEY,
BIOLOGICAL_PROCESS,
CARBON_SUBSTRATE_CATEGORY,
Expand Down Expand Up @@ -48,9 +48,9 @@
TROPHICALLY_INTERACTS_WITH,
TYPE_COLUMN,
)
from kg_bacdive.transform_utils.transform import Transform
from kg_bacdive.utils.ner_utils import annotate
from kg_bacdive.utils.pandas_utils import drop_duplicates
from kg_microbe.transform_utils.transform import Transform
from kg_microbe.utils.ner_utils import annotate
from kg_microbe.utils.pandas_utils import drop_duplicates

OUTPUT_FILE_SUFFIX = "_ner.tsv"
STOPWORDS_FN = "stopwords.txt"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import yaml

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
CATEGORY_COLUMN,
DESCRIPTION_COLUMN,
ID_COLUMN,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from oaklib import get_adapter
from oaklib.datamodels.text_annotator import TextAnnotationConfiguration

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
END_COLUMN,
MATCHES_WHOLE_TEXT_COLUMN,
OBJECT_ALIASES_COLUMN,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pandas as pd

from kg_bacdive.transform_utils.constants import OBJECT_COLUMN, PREDICATE_COLUMN, SUBJECT_COLUMN
from kg_microbe.transform_utils.constants import OBJECT_COLUMN, PREDICATE_COLUMN, SUBJECT_COLUMN


def drop_duplicates(file_path: Path):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
from typing import List, Union

from kg_bacdive.transform_utils.constants import (
from kg_microbe.transform_utils.constants import (
ROBOT_REMOVED_SUFFIX,
)

Expand Down
6 changes: 3 additions & 3 deletions merge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ configuration:
# define the type for non-canonical properties for RDF export

merged_graph:
name: kg-bacdive graph
name: kg-microbe graph
source:
ncbitaxon:
name: "NCBITaxon"
Expand Down Expand Up @@ -52,7 +52,7 @@ merged_graph:
operations:
- name: kgx.graph_operations.summarize_graph.generate_graph_stats
args:
graph_name: kg-bacdive graph
graph_name: kg-microbe graph
filename: merged_graph_stats.yaml
node_facet_properties:
- provided_by
Expand All @@ -67,4 +67,4 @@ merged_graph:
# merged-kg-nt:
# format: nt
# compression: gz
# filename: kg_bacdive.nt.gz
# filename: kg_microbe.nt.gz
2 changes: 1 addition & 1 deletion merged_graph_stats.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ edge_stats:
source:
- unknown
total_edges: 1341461
graph_name: kg-bacdive graph
graph_name: kg-microbe graph
node_stats:
count_by_category:
biolink:AbstractEntity:
Expand Down
2 changes: 1 addition & 1 deletion notebook/kg_bacdive.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@
"source": [
"from grape import Graph\n",
"\n",
"kg_bacdive = Graph.from_csv(\n",
"kg_microbe = Graph.from_csv(\n",
" # Edges related parameters\n",
"\n",
" ## The path to the edges list tsv\n",
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[tool.poetry]
name = "kg-bacdive"
name = "kg-microbe"
version = "0.0.0"
description = "kg-bacdive"
description = "kg-microbe"
authors = ["Harshad Hegde <[email protected]>"]
license = "MIT"
readme = "README.md"
Expand Down Expand Up @@ -44,7 +44,7 @@ docs = [
]

[tool.poetry.scripts]
kg = 'kg_bacdive.run:main'
kg = 'kg_microbe.run:main'

[tool.poetry-dynamic-versioning]
enable = true
Expand Down
8 changes: 4 additions & 4 deletions templates/README.build
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
The files in this directory represent a build of the kg-bacdive knowledge graph.
The files in this directory represent a build of the kg-microbe knowledge graph.

These file include:
* Jenkinsfile - the exact command run to produce this build of the KG
* kg-bacdive.jnl.gz - a blazegraph journal that can be loaded to produce a Blazegraph endpoint
* kg-bacdive.nt.gz- an ntriples/RDF version of the KG
* kg-bacdive.tar.gz - a tar.gz file containing the KG in KGX TSV format (with 'merged-kg' within the name of both node and edge files)
* kg-microbe.jnl.gz - a blazegraph journal that can be loaded to produce a Blazegraph endpoint
* kg-microbe.nt.gz- an ntriples/RDF version of the KG
* kg-microbe.tar.gz - a tar.gz file containing the KG in KGX TSV format (with 'merged-kg' within the name of both node and edge files)

The subdirectories in this directory are:

Expand Down
2 changes: 1 addition & 1 deletion templates/README.toplevel
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
The subdirectories here each contain a build of the kg-bacdive knowledge graph.
The subdirectories here each contain a build of the kg-microbe knowledge graph.
The directories are named with a date (ISO 8601 format, YYYYMMDD) according to when
the build was produced. The current/ directory is the most recent build.

Expand Down
2 changes: 1 addition & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"""Tests for kg-bacdive."""
"""Tests for kg-microbe."""
2 changes: 1 addition & 1 deletion tests/demo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import unittest

from kg_bacdive import __version__
from kg_microbe import __version__


class TestVersion(unittest.TestCase):
Expand Down
14 changes: 7 additions & 7 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ deps =
ruff
skip_install = true
commands =
black kg_bacdive/ tests/
ruff --fix kg_bacdive/ tests/
black kg_microbe/ tests/
ruff --fix kg_microbe/ tests/
description = Run linters.

# This is used for QC checks.
Expand All @@ -49,8 +49,8 @@ deps =
ruff
skip_install = true
commands =
black --check --diff kg_bacdive/ tests/
ruff check kg_bacdive/ tests/
black --check --diff kg_microbe/ tests/
ruff check kg_microbe/ tests/
description = Run linters.

[testenv:doclint]
Expand All @@ -67,20 +67,20 @@ skip_install = true
deps =
codespell
tomli # required for getting config from pyproject.toml
commands = codespell kg_bacdive/ tests/ -S kg_bacdive/transform_utils/*/tmp/*
commands = codespell kg_microbe/ tests/ -S kg_microbe/transform_utils/*/tmp/*

[testenv:codespell-write]
description = Run spell checker and write corrections.
skip_install = true
deps =
codespell
tomli
commands = codespell kg_bacdive/ tests/ --write-changes -S kg_bacdive/transform_utils/*/tmp/*
commands = codespell kg_microbe/ tests/ --write-changes -S kg_microbe/transform_utils/*/tmp/*

[testenv:docstr-coverage]
skip_install = true
deps =
docstr-coverage
commands =
docstr-coverage kg_bacdive/ tests/ --skip-private --skip-magic
docstr-coverage kg_microbe/ tests/ --skip-private --skip-magic
description = Run the docstr-coverage tool to check documentation coverage