Skip to content

Commit

Permalink
the graphs are stable, the system generates sorted node orders
Browse files Browse the repository at this point in the history
  • Loading branch information
ialbert committed Apr 2, 2024
1 parent 09db87c commit 0413e84
Show file tree
Hide file tree
Showing 12 changed files with 197 additions and 115 deletions.
7 changes: 3 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ web:

# Performs the testing.
test:
(cd test && make test)
pytest

# Runs a linter.
lint:
Expand All @@ -38,12 +38,11 @@ fix:
push:
git commit -am 'saving work' && git push

VERSION ?= $(shell grep '__version__' src/genescape/__about__.py | sed "s/__version__ = \"\\(.*\\)\"/\1/" )
VERSION ?= $(shell grep '__VERSION__' src/genescape/__about__.py | sed "s/__VERSION__ = \"\\(.*\\)\"/\1/" )

NAME = GeneScape

mac:

# Example of generating a tag file with the version
@echo "VERSION=${VERSION}"

Expand All @@ -63,7 +62,7 @@ clean:
realclean: clean
rm -rf build dist

build:
build: test
rm -rf build dist
hatch build
ls -lh dist
Expand Down
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,15 @@ all = [
]

[tool.black]
target-version = ["py39"]
target-version = [ 'py310' ]
line-length = 120
skip-string-normalization = true
exclude = '''
src/genescape/bottle\.py
'''

[tool.ruff]
target-version = "py39"
target-version = 'py310'
line-length = 120
exclude = [
# This a vendored third-party library.
Expand Down Expand Up @@ -159,7 +162,7 @@ ignore = [
"C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",

# Pet peevees
"Q000", "C402", "F401", "E401", "T201",
"Q000", "C402", "F401", "E401", "T201", "E501",
]
unfixable = [
# Don't touch unused imports
Expand Down
2 changes: 1 addition & 1 deletion src/genescape/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.2"
__VERSION__ = "0.8.0"
3 changes: 1 addition & 2 deletions src/genescape/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@

from genescape.__about__ import __version__
from genescape.__about__ import __VERSION__
22 changes: 12 additions & 10 deletions src/genescape/annot.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
"""
Annotates a list of genes with functions based on the GO graph
"""
import csv, io

import csv
import gzip
import sys, json, re
import io
import json
import re
import sys
from collections import Counter
from itertools import *
from pathlib import Path
from genescape import utils, resources

from genescape import resources, utils


def run(data, index, pattern='', mincount=1, root=utils.NS_ALL, csvout=False):

# Collect the run status into this list.
status = {
utils.CODE_FIELD: 0,
utils.INVALID_FIELD: []
}
status = {utils.CODE_FIELD: 0, utils.INVALID_FIELD: []}

# Checking the input
if not index:
Expand Down Expand Up @@ -46,7 +48,7 @@ def build():
go = idx[utils.IDX_OBO]

# The valid ids are the unqiue gene and protein ids.
valid_ids = set(sym2go) |set(go)
valid_ids = set(sym2go) | set(go)

# Fetch GO functions for a given gene or protein id.
def get_func(name):
Expand All @@ -73,7 +75,7 @@ def get_func(name):
# The missing ids.
if miss:
status[utils.CODE_FIELD] = 1
status[utils.INVALID_FIELD] = (list(miss))
status[utils.INVALID_FIELD] = list(miss)
utils.warn(f"{status}")

# Get the elements for a go id.
Expand Down Expand Up @@ -119,7 +121,7 @@ def go2ns(goid):
if root != utils.NS_ALL and root != root_code:
continue

name = dict(zip(data_fields, [goid, root_code, cnt,func, funcs, cnt, n_size, label]))
name = dict(zip(data_fields, [goid, root_code, cnt, func, funcs, cnt, n_size, label], strict=False))

res.append(name)

Expand Down
24 changes: 12 additions & 12 deletions src/genescape/build.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#
# Parse an OBO and produces a GZIPed JSON file
#
import gzip, os, sys
import json
import csv
import gzip
import json
import os
import sys
from itertools import *
from pathlib import Path
from genescape import utils, resources

from genescape import resources, utils
from genescape.__about__ import __version__


def parse_line(text, sep):
text = text.strip()
elem = text.split(sep)[1].strip().strip('"')
Expand Down Expand Up @@ -50,6 +54,7 @@ def parse_gaf(fname):
stream = csv.reader(stream, delimiter="\t")
return stream


def make_index(obo, gaf, index, with_synonyms=False):

if not os.path.isfile(obo):
Expand Down Expand Up @@ -99,8 +104,8 @@ def make_index(obo, gaf, index, with_synonyms=False):

# Database metadata
meta = dict(
version=__version__, date=utils.get_date(),
gaf=gaf.name, obo=obo.name, index=index.name, synonyms=with_synonyms)
version=__version__, date=utils.get_date(), gaf=gaf.name, obo=obo.name, index=index.name, synonyms=with_synonyms
)

# The complete data
data = {
Expand Down Expand Up @@ -132,13 +137,11 @@ def make_index(obo, gaf, index, with_synonyms=False):
gaf = res.GAF_FILE
ind = Path("genescape.json.gz")


@utils.timer
def test_make_index():
retval = make_index(obo=obo, gaf=gaf, index=ind, with_synonyms=False)
return retval


@utils.timer
def test_load_json():
retval = resources.get_json(ind)
Expand All @@ -147,8 +150,5 @@ def test_load_json():
vals = test_make_index()
obj = test_load_json()

#data = obj[utils.IDX_SYM2GO]
#print (json.dumps(data, indent=4))



# data = obj[utils.IDX_SYM2GO]
# print (json.dumps(data, indent=4))
46 changes: 35 additions & 11 deletions src/genescape/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import sys
from pathlib import Path

import click

from genescape import utils, resources
from genescape import resources, utils


@click.group()
Expand All @@ -13,16 +14,29 @@ def cli():
"""
pass


ROOT_CHOICES = [utils.NS_BP, utils.NS_MF, utils.NS_CC, utils.NS_ALL]


@cli.command()
@click.argument("fname", default=None, required=False)
@click.option("-o", "--out", "out", metavar="TEXT", default="genescape.pdf", help="output graph file")
@click.option("-i", "--index", "index", metavar="FILE", help="OBO index file", )
@click.option(
"-i",
"--index",
"index",
metavar="FILE",
help="OBO index file",
)
@click.option("-m", "--match", "match", metavar="REGEX", default='', help="Regular expression match on function")
@click.option("-c", "--count", "count", metavar="INT", default=1, type=int, help="The minimal count for a GO term (1)")
@click.option('-r', '--root', type=click.Choice(ROOT_CHOICES, case_sensitive=False), default=utils.NS_ALL, help='Select a category: BP, MF, CC, or ALL.')
@click.option(
'-r',
'--root',
type=click.Choice(ROOT_CHOICES, case_sensitive=False),
default=utils.NS_ALL,
help='Select a category: BP, MF, CC, or ALL.',
)
@click.option("-t", "--test", "test", is_flag=True, help="run with demo data")
@click.option("-v", "verbose", is_flag=True, help="verbose output")
@click.help_option("-h", "--help")
Expand Down Expand Up @@ -64,7 +78,13 @@ def tree(fname, out=None, index=None, root=utils.NS_ALL, match=None, count=1, ve
@click.option("-m", "--match", "match", metavar="REGEX", default='', help="Regular expression match on function")
@click.option("-c", "--count", "count", metavar="INT", default=1, type=int, help="The minimal count for a GO term (1)")
@click.option("-t", "--test", "test", is_flag=True, help="Run with test data")
@click.option('-r', '--root', type=click.Choice(ROOT_CHOICES, case_sensitive=False), default=utils.NS_ALL, help='Select a category: BP, MF, CC, or ALL.')
@click.option(
'-r',
'--root',
type=click.Choice(ROOT_CHOICES, case_sensitive=False),
default=utils.NS_ALL,
help='Select a category: BP, MF, CC, or ALL.',
)
@click.option("--csv", "csvout", is_flag=True, help="Produce CSV output instead of JSON")
@click.option("-v", "verbose", is_flag=True, help="Verbose output.")
@click.help_option("-h", "--help")
Expand Down Expand Up @@ -106,17 +126,18 @@ def annotate(fname, index=None, root=utils.NS_ALL, verbose=False, test=False, cs
if not out:
print(text)
else:
with open(out, "wt") as fp:
with open(out, "w") as fp:
fp.write(text)


@cli.command()
@click.option("-b", "--obo", "obo", help="Input OBO file (go-basic.obo)")
@click.option("-g", "--gaf", "gaf", help="Input GAF file (goa_human.gaf.gz)")
@click.option("-i", "--index", "index", default="genescape.json.gz", help="Output index file (genescape.json.gz)")
@click.option("-s", "--synonms", "synon", is_flag=True, help="Include synonyms in the index")
@click.option("-d", "--dump", "dump", is_flag=True, help="Print the index to stdout")
@click.option("-s", "--synonms", "synon", is_flag=True, help="Include synonyms in the index")
@click.option("-d", "--dump", "dump", is_flag=True, help="Print the index to stdout")
@click.help_option("-h", "--help")
def build(index=None, obo=None, gaf=None, synon=False, dump=False ):
def build(index=None, obo=None, gaf=None, synon=False, dump=False):
"""
Builds a JSON index file from an OBO file.
"""
Expand All @@ -135,26 +156,29 @@ def build(index=None, obo=None, gaf=None, synon=False, dump=False ):
ind = Path(index)

if dump:

@utils.timer
def load_index():
retval = resources.get_json(ind)
return retval

obj = load_index()
meta = obj[utils.IDX_META_DATA]
print (f"# {meta}")
print(f"# {meta}")
sym2go = obj[utils.IDX_SYM2GO]
for key, value in sym2go.items():
row = [ key] + value
row = [key] + value
print("\t".join(row))

else:
# Run the build command.
build.make_index(obo=obo, gaf=gaf, index=ind, with_synonyms=synon)


@cli.command()
@click.option("--devmode", "devmode", is_flag=True, help="run in development mode")
@click.option("--reset", "reset", is_flag=True, help="reset the resources")
@click.option("-i", "--index", "index", help="Genescape index file")
@click.option("-i", "--index", "index", help="Genescape index file")
@click.option("-v", "verbose", is_flag=True, help="Verbose output.")
@click.help_option("-h", "--help")
def web(devmode=False, reset=False, verbose=False, index=None):
Expand Down
Loading

0 comments on commit 0413e84

Please sign in to comment.