From c572f4265dffaf8368f668397485302fd1c98643 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Sep 2024 18:42:28 +0000 Subject: [PATCH 1/3] release 0.4.27 --- HISTORY.md | 6 +++ mirtop/gff/__init__.py | 2 +- mirtop/gff/convert.py | 101 ++++++++++++++++++++-------------------- mirtop/libs/parse.py | 2 + mirtop/mirna/realign.py | 4 +- setup.py | 3 +- 6 files changed, 63 insertions(+), 55 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 5936577..831218d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,9 @@ +0.4.27 + +* fix random order in Variant field [#84](https://github.com/miRTop/mirtop/issues/83) +* fix possible duplication of lines [#80](https://github.com/miRTop/mirtop/issues/80) +* accept prefix for gff output [#84](https://github.com/miRTop/mirtop/issues/84) + 0.4.26 * Support spaces and special characters in bam files diff --git a/mirtop/gff/__init__.py b/mirtop/gff/__init__.py index dea6b93..71110a4 100644 --- a/mirtop/gff/__init__.py +++ b/mirtop/gff/__init__.py @@ -68,7 +68,7 @@ def reader(args): if args.low_memory: return None merged = merge.merge(out_dts, samples) - fn_merged_out = op.join(args.out, "mirtop.%s" % args.out_format) + fn_merged_out = op.join(args.out, "%s.%s" % (args.prefix, args.out_format)) _write(merged, header.create(samples, database, header.make_tools([args.format])), fn_merged_out, args) diff --git a/mirtop/gff/convert.py b/mirtop/gff/convert.py index 56f6537..e866c74 100644 --- a/mirtop/gff/convert.py +++ b/mirtop/gff/convert.py @@ -3,6 +3,7 @@ from __future__ import print_function import os.path as op +import pandas as pd from mirtop.mirna import fasta, mapper from mirtop.mirna.realign import read_id @@ -25,69 +26,69 @@ def convert_gff_counts(args): UID miRNA Variant Sample1 Sample2 ... Sample N """ sep = "\t" - variant_header = sep.join(['iso_5p', 'iso_3p', - 'iso_add3p', 'iso_snp']) + variant_header = ['iso_5p', 'iso_3p', + 'iso_add3p', 'iso_snp'] if args.add_extra: precursors = fasta.read_precursor(args.hairpin, args.sps) matures = mapper.read_gtf_to_precursor(args.gtf) - variant_header = sep.join([variant_header, - 'iso_5p_nt', 'iso_3p_nt', - 'iso_add3p_nt', 'iso_snp_nt']) + variant_header = variant_header + ['iso_5p_nt', 'iso_3p_nt', 'iso_add3p_nt', 'iso_snp_nt'] logger.info("INFO Reading GFF file %s", args.gff) logger.info("INFO Writing TSV file to directory %s", args.out) gff_file = open(args.gff, 'r') out_file = op.join(args.out, "%s.tsv" % op.splitext(op.basename(args.gff))[0]) + all_lines = [] missing_parent = 0 missing_mirna = 0 unvalid_uid = 0 - with open(out_file, 'w') as outh: - - for samples_line in gff_file: - if samples_line.startswith("## COLDATA:"): - samples = sep.join(samples_line.strip().split("COLDATA:")[1].strip().split(",")) - header = sep.join(['UID', 'Read', 'miRNA', 'Variant', - variant_header, samples]) - print(header, file=outh) - break - - for mirna_line in gff_file: - gff = feature(mirna_line) - attr = gff.attributes - UID = attr["UID"] - Read = attr["Read"] - mirna = attr["Name"] - parent = attr["Parent"] - variant = attr["Variant"] - try: - read_id(UID) - except KeyError: - unvalid_uid += 1 + #with open(out_file, 'w') as outh: + + for samples_line in gff_file: + if samples_line.startswith("## COLDATA:"): + samples = [sep.join(samples_line.strip().split("COLDATA:")[1].strip().split(","))] + #header = sep.join(['UID', 'Read', 'miRNA', 'Variant', + # variant_header, samples]) + #print(header, file=outh) + break + + for mirna_line in gff_file: + gff = feature(mirna_line) + attr = gff.attributes + UID = attr["UID"] + Read = attr["Read"] + mirna = attr["Name"] + parent = attr["Parent"] + variant = attr["Variant"] + try: + read_id(UID) + except KeyError: + unvalid_uid += 1 + continue + + expression = [sep.join(attr["Expression"].strip().split(","))] + cols_variants = _expand(variant) + logger.debug("COUNTS::Read:%s" % Read) + logger.debug("COUNTS::EXTRA:%s" % variant) + if args.add_extra: + if parent not in precursors: + missing_parent += 1 continue - - expression = sep.join(attr["Expression"].strip().split(",")) - cols_variants = sep.join(_expand(variant)) - logger.debug("COUNTS::Read:%s" % Read) - logger.debug("COUNTS::EXTRA:%s" % variant) - if args.add_extra: - if parent not in precursors: - missing_parent += 1 - continue - if mirna not in matures[parent]: - missing_mirna += 1 - continue - extra = variant_with_nt(mirna_line, precursors, matures) - if extra == "Invalid": - continue - logger.debug("COUNTS::EXTRA:%s" % extra) - cols_variants = sep.join([cols_variants] + _expand(extra, True)) - summary = sep.join([UID, Read, mirna, variant, - cols_variants, expression]) - logger.debug(summary) - print(summary, file=outh) - - gff_file.close() + if mirna not in matures[parent]: + missing_mirna += 1 + continue + extra = variant_with_nt(mirna_line, precursors, matures) + if extra == "Invalid": + continue + logger.debug("COUNTS::EXTRA:%s" % extra) + cols_variants = [cols_variants] + _expand(extra, True) + #import pdb; pdb.set_trace() + summary = [UID, Read, mirna, variant] + cols_variants + expression + logger.debug(summary) + all_lines.append(summary) + df = pd.DataFrame(all_lines, columns = ['UID', 'Read', 'miRNA', 'Variant'] + variant_header + samples) + df = df.drop_duplicates() + df.to_csv(out_file, sep="\t", index=False) logger.info("Missing Parents in hairpin file: %s" % missing_parent) logger.info("Missing MiRNAs in GFF file: %s" % missing_mirna) logger.info("Non valid UID: %s" % unvalid_uid) diff --git a/mirtop/libs/parse.py b/mirtop/libs/parse.py index e21b91e..6a638b8 100644 --- a/mirtop/libs/parse.py +++ b/mirtop/libs/parse.py @@ -82,6 +82,8 @@ def _add_subparser_gff(subparsers): parser.add_argument("files", nargs="*", help="Bam files.") parser.add_argument("-o", "--out", dest="out", required=1, help="dir of output files") + parser.add_argument("--prefix", dest="prefix", required=0, + default="mirtop", help="prefix for output file") parser.add_argument("--sps", help="species") parser.add_argument("--keep-name", action="store_true", diff --git a/mirtop/mirna/realign.py b/mirtop/mirna/realign.py index f2bad95..3d409b1 100644 --- a/mirtop/mirna/realign.py +++ b/mirtop/mirna/realign.py @@ -1,5 +1,5 @@ import re -from Bio import pairwise2 +from Bio.Align import PairwiseAligner from Bio.Seq import Seq from collections import defaultdict @@ -94,7 +94,7 @@ def formatGFF(self): value.append("iso_3p:%s%s" % (direction, size)) if not value: value = ["NA"] - return ",".join(list(set(value))) + return ",".join(sorted(list(set(value)))) def format(self, sep="\t"): """Create tabular line from variant fields.""" diff --git a/setup.py b/setup.py index 219b825..ed944fb 100644 --- a/setup.py +++ b/setup.py @@ -3,8 +3,7 @@ import os from setuptools import setup, find_packages -version = '0.4.26' - +version = '0.4.27' url = 'http://github.com/mirtop/mirtop' From c2a56267f8597da3e61d82e38e886cddb3629268 Mon Sep 17 00:00:00 2001 From: Lorena Pantano Date: Fri, 13 Sep 2024 19:20:36 +0000 Subject: [PATCH 2/3] add publishing ci --- .github/workflows/ci-cd.yml | 94 +++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .github/workflows/ci-cd.yml diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml new file mode 100644 index 0000000..9feca61 --- /dev/null +++ b/.github/workflows/ci-cd.yml @@ -0,0 +1,94 @@ +name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI + +on: push + +jobs: + build: + name: Build distribution 📦 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python 🐍 distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/mirtop # Replace with your PyPI project name + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + github-release: + name: >- + Sign the Python 🐍 distribution 📦 with Sigstore + and upload them to GitHub Release + needs: + - publish-to-pypi + runs-on: ubuntu-latest + + permissions: + contents: write # IMPORTANT: mandatory for making GitHub Releases + id-token: write # IMPORTANT: mandatory for sigstore + + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Sign the dists with Sigstore + uses: sigstore/gh-action-sigstore-python@v2.1.1 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + run: >- + gh release create + '${{ github.ref_name }}' + --repo '${{ github.repository }}' + --notes "" + - name: Upload artifact signatures to GitHub Release + env: + GITHUB_TOKEN: ${{ github.token }} + # Upload to GitHub Release using the `gh` CLI. + # `dist/` contains the built packages, and the + # sigstore-produced signatures and certificates. + run: >- + gh release upload + '${{ github.ref_name }}' dist/** + --repo '${{ github.repository }}' + From 8aeb7a52ac12c9a6e281f9184a34e36498addead Mon Sep 17 00:00:00 2001 From: Lorena Pantano Date: Fri, 13 Sep 2024 20:30:39 +0000 Subject: [PATCH 3/3] passing tests --- mirtop/gff/convert.py | 4 +- mirtop/gff/stats.py | 8 +- mirtop/mirna/realign.py | 4 +- test/test_automated_analysis.py | 489 ++++++++++++++++---------------- test/test_functions.py | 74 ++--- 5 files changed, 290 insertions(+), 289 deletions(-) diff --git a/mirtop/gff/convert.py b/mirtop/gff/convert.py index e866c74..db33bfd 100644 --- a/mirtop/gff/convert.py +++ b/mirtop/gff/convert.py @@ -81,11 +81,11 @@ def convert_gff_counts(args): if extra == "Invalid": continue logger.debug("COUNTS::EXTRA:%s" % extra) - cols_variants = [cols_variants] + _expand(extra, True) - #import pdb; pdb.set_trace() + cols_variants = cols_variants + _expand(extra, True) summary = [UID, Read, mirna, variant] + cols_variants + expression logger.debug(summary) all_lines.append(summary) + #import pdb; pdb.set_trace() df = pd.DataFrame(all_lines, columns = ['UID', 'Read', 'miRNA', 'Variant'] + variant_header + samples) df = df.drop_duplicates() df.to_csv(out_file, sep="\t", index=False) diff --git a/mirtop/gff/stats.py b/mirtop/gff/stats.py index d1d3c94..9f9f1bb 100644 --- a/mirtop/gff/stats.py +++ b/mirtop/gff/stats.py @@ -107,13 +107,13 @@ def _add_missing(df): # ref_miRNA_mean category = "ref_miRNA_mean" if sum(df['category']==category) == 0: - df2 = {'category': category, 'sample': df['sample'].iat[0], 'counts': 0} - df = df.append(df2, ignore_index = True) + df2 = pd.DataFrame({'category': category, 'sample': df['sample'].iat[0], 'counts': 0}, index=[0]) + df = pd.concat([df, df2], ignore_index = True) category = "isomiR_sum" if sum(df['category']==category) == 0: - df2 = {'category': category, 'sample': df['sample'].iat[0], 'counts': 0} - df = df.append(df2, ignore_index = True) + df2 = pd.DataFrame({'category': category, 'sample': df['sample'].iat[0], 'counts': 0}, index=[0]) + df = pd.concat([df, df2], ignore_index = True) return df diff --git a/mirtop/mirna/realign.py b/mirtop/mirna/realign.py index 3d409b1..f2bad95 100644 --- a/mirtop/mirna/realign.py +++ b/mirtop/mirna/realign.py @@ -1,5 +1,5 @@ import re -from Bio.Align import PairwiseAligner +from Bio import pairwise2 from Bio.Seq import Seq from collections import defaultdict @@ -94,7 +94,7 @@ def formatGFF(self): value.append("iso_3p:%s%s" % (direction, size)) if not value: value = ["NA"] - return ",".join(sorted(list(set(value)))) + return ",".join(list(set(value))) def format(self, sep="\t"): """Create tabular line from variant fields.""" diff --git a/test/test_automated_analysis.py b/test/test_automated_analysis.py index 18cfc82..60f8890 100644 --- a/test/test_automated_analysis.py +++ b/test/test_automated_analysis.py @@ -10,8 +10,9 @@ import contextlib import functools -from nose import SkipTest -from nose.plugins.attrib import attr +# from nose import SkipTest +import pytest +#from nose.plugins.attrib import attr @contextlib.contextmanager @@ -39,7 +40,7 @@ def inner(*args, **kwargs): try: test(*args, **kwargs) except Exception: - raise SkipTest + raise pytest.skip else: raise AssertionError('Failure expected') return inner @@ -66,7 +67,7 @@ def _download_to_dir(self, url, dirname): shutil.move(os.path.basename(dirname), dirname) os.remove(os.path.basename(url)) - @attr(simulate=True) + ##@attr(simulate=True) def test_simulate(self): """Check simulated data""" mirna = "TGAGGTAGTAGGTTGTATAGTT" @@ -102,10 +103,10 @@ def test_simulate(self): n += 1 print("rate %s/%s" % (correct, n)) - @attr(complete=True) - @attr(annotate=True) - @attr(bam=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(annotate=True) + ##@attr(bam=True) + ##@attr(cmd=True) def test_srnaseq_annotation_bam(self): """Run miraligner analysis """ @@ -121,9 +122,9 @@ def test_srnaseq_annotation_bam(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(low_memory=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(low_memory=True) + ##@attr(cmd=True) def test_srnaseq_annotation_bam_chunk(self): """Run miraligner analysis """ @@ -139,9 +140,9 @@ def test_srnaseq_annotation_bam_chunk(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(cmd_bam_genomic=True) - @attr(complete=True) - @attr(cmd=True) + ##@attr(cmd_bam_genomic=True) + ##@attr(complete=True) + ##@attr(cmd=True) def test_srnaseq_annotation_genomic_bam(self): """Run genomic bam analysis """ @@ -157,9 +158,9 @@ def test_srnaseq_annotation_genomic_bam(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(cmd_bam_genomic_low_memory=True) - @attr(complete=True) - @attr(cmd=True) + ##@attr(cmd_bam_genomic_low_memory=True) + ##@attr(complete=True) + ##@attr(cmd=True) def test_srnaseq_annotation_genomic_bam_low_memory(self): """Run genomic bam analysis """ @@ -175,127 +176,127 @@ def test_srnaseq_annotation_genomic_bam_low_memory(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_seqbuster=True) - @attr(cmd=True) - def test_srnaseq_annotation_seqbuster(self): - """Run miraligner analysis - """ - with make_workdir(): - clcode = ["mirtop", - "gff", - "--format", "seqbuster", - "--sps", "hsa", - "--hairpin", "../../data/examples/annotate/hairpin.fa", - "--gtf", "../../data/examples/annotate/hsa.gff3", - "-o", "test_out_mirs", - "../../data/examples/seqbuster/reads.mirna"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_seqbuster_low_memory=True) - @attr(cmd=True) - def test_srnaseq_annotation_seqbuster_low_memory(self): - """Run miraligner analysis - """ - with make_workdir(): - clcode = ["mirtop", - "gff", "--low-memory", - "--format", "seqbuster", - "--sps", "hsa", - "--hairpin", "../../data/examples/annotate/hairpin.fa", - "--gtf", "../../data/examples/annotate/hsa.gff3", - "-o", "test_out_mirs", - "../../data/examples/seqbuster/reads.mirna"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_isomirsea=True) - @attr(cmd=True) - def test_srnaseq_annotation_isomirsea(self): - """Run isomirsea analysis - """ - with make_workdir(): - clcode = ["mirtop", - "gff", - "--format", "isomirsea", - "--sps", "hsa", - "--hairpin", "../../data/examples/annotate/hairpin.fa", - "--gtf", "../../data/examples/annotate/hsa.gff3", - "-o", "test_out_mirs", - "../../data/examples/isomir-sea/tagMir-all.gff", - "-d", "-vd"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_srnabench=True) - @attr(cmd=True) - def test_srnaseq_annotation_srnabench(self): - """Run srnabench analysis - """ - with make_workdir(): - clcode = ["mirtop", - "gff", - "--format", "srnabench", - "--sps", "hsa", - "--hairpin", "../../data/examples/annotate/hairpin.fa", - "--gtf", "../../data/examples/annotate/hsa.gff3", - "-o", "test_out_mirs", - "../../data/examples/srnabench/", - "-d", "-vd"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_optimir=True) - @attr(cmd=True) - def test_srnaseq_annotation_optimir(self): - """Run optimir analysis - """ - with make_workdir(): - clcode = ["mirtop", - "gff", - "--format", "optimir", - "--sps", "hsa", - "--hairpin", "../../data/examples/annotate/hairpin.fa", - "--gtf", "../../data/examples/annotate/hsa.gff3", - "-o", "test_out_mirs", - "../../data/examples/optimir/synthetic_100_full.gff3", - "-d", "-vd"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_manatee=True) - @attr(cmd=True) - def test_srnaseq_annotation_manatee(self): - """Run Manatee analysis - """ - with make_workdir(): - clcode = ["mirtop", - "gff", - "--format", "manatee", - "--sps", "hsa", - "--hairpin", "../../data/examples/annotate/hairpin.fa", - "--gtf", "../../data/examples/annotate/hsa.gff3", - "-o", "test_out_mirs", - "../../data/examples/manatee/simulated.sam", - "-d", "-vd"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_stats=True) - @attr(cmd=True) + # ###@attr(complete=True) + # ###@attr(cmd_seqbuster=True) + # ####@attr(cmd=True) + # def test_srnaseq_annotation_seqbuster(self): + # """Run miraligner analysis + # """ + # with make_workdir(): + # clcode = ["mirtop", + # "gff", + # "--format", "seqbuster", + # "--sps", "hsa", + # "--hairpin", "../../data/examples/annotate/hairpin.fa", + # "--gtf", "../../data/examples/annotate/hsa.gff3", + # "-o", "test_out_mirs", + # "../../data/examples/seqbuster/reads.mirna"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + # ###@attr(complete=True) + # ###@attr(cmd_seqbuster_low_memory=True) + # ###@attr(cmd=True) + # def test_srnaseq_annotation_seqbuster_low_memory(self): + # """Run miraligner analysis + # """ + # with make_workdir(): + # clcode = ["mirtop", + # "gff", "--low-memory", + # "--format", "seqbuster", + # "--sps", "hsa", + # "--hairpin", "../../data/examples/annotate/hairpin.fa", + # "--gtf", "../../data/examples/annotate/hsa.gff3", + # "-o", "test_out_mirs", + # "../../data/examples/seqbuster/reads.mirna"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + # ##@attr(complete=True) + # ##@attr(cmd_isomirsea=True) + # ##@attr(cmd=True) + # def test_srnaseq_annotation_isomirsea(self): + # """Run isomirsea analysis + # """ + # with make_workdir(): + # clcode = ["mirtop", + # "gff", + # "--format", "isomirsea", + # "--sps", "hsa", + # "--hairpin", "../../data/examples/annotate/hairpin.fa", + # "--gtf", "../../data/examples/annotate/hsa.gff3", + # "-o", "test_out_mirs", + # "../../data/examples/isomir-sea/tagMir-all.gff", + # "-d", "-vd"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + # ##@attr(complete=True) + # ##@attr(cmd_srnabench=True) + # ##@attr(cmd=True) + # def test_srnaseq_annotation_srnabench(self): + # """Run srnabench analysis + # """ + # with make_workdir(): + # clcode = ["mirtop", + # "gff", + # "--format", "srnabench", + # "--sps", "hsa", + # "--hairpin", "../../data/examples/annotate/hairpin.fa", + # "--gtf", "../../data/examples/annotate/hsa.gff3", + # "-o", "test_out_mirs", + # "../../data/examples/srnabench/", + # "-d", "-vd"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + # ##@attr(complete=True) + # ##@attr(cmd_optimir=True) + # ##@attr(cmd=True) + # def test_srnaseq_annotation_optimir(self): + # """Run optimir analysis + # """ + # with make_workdir(): + # clcode = ["mirtop", + # "gff", + # "--format", "optimir", + # "--sps", "hsa", + # "--hairpin", "../../data/examples/annotate/hairpin.fa", + # "--gtf", "../../data/examples/annotate/hsa.gff3", + # "-o", "test_out_mirs", + # "../../data/examples/optimir/synthetic_100_full.gff3", + # "-d", "-vd"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + # ##@attr(complete=True) + # ##@attr(cmd_manatee=True) + # ##@attr(cmd=True) + # def test_srnaseq_annotation_manatee(self): + # """Run Manatee analysis + # """ + # with make_workdir(): + # clcode = ["mirtop", + # "gff", + # "--format", "manatee", + # "--sps", "hsa", + # "--hairpin", "../../data/examples/annotate/hairpin.fa", + # "--gtf", "../../data/examples/annotate/hsa.gff3", + # "-o", "test_out_mirs", + # "../../data/examples/manatee/simulated.sam", + # "-d", "-vd"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + ##@attr(complete=True) + ##@attr(cmd_stats=True) + ##@attr(cmd=True) def test_srnaseq_stats(self): """Run stats analysis """ @@ -312,9 +313,9 @@ def test_srnaseq_stats(self): if sum(1 for line in open('test_out_mirs/mirtop_stats.txt')) == 1: raise ValueError("File is empty, something is wrong with stats cmd.") - @attr(complete=True) - @attr(cmd_merge=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_merge=True) + ##@attr(cmd=True) def test_merge_bam(self): """ Run collapse two samples @@ -332,9 +333,9 @@ def test_merge_bam(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_export_seqbuster=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_export_seqbuster=True) + ##@attr(cmd=True) def test_export_seqbuster(self): """ Run SEQBUSTER export command @@ -350,9 +351,9 @@ def test_export_seqbuster(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_export_vcf=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_export_vcf=True) + ##@attr(cmd=True) def test_export_vcf(self): """ Run VCF export command @@ -370,9 +371,9 @@ def test_export_vcf(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_export_fasta=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_export_fasta=True) + ##@attr(cmd=True) def test_export_fasta(self): """ Run FASTA export command @@ -390,9 +391,9 @@ def test_export_fasta(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_count=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_count=True) + ##@attr(cmd=True) def test_count(self): """ Run count command @@ -408,49 +409,49 @@ def test_count(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_spikeins=True) - @attr(cmd=True) - def test_spikeins_cmd(self): - """Run spikeins analysis - """ - import platform - with make_workdir(): - shutil.copy("../../data/examples/spikeins/spikeins.fa", - "spikeins.fa") - clcode = ["mirtop", - "spikein", - "spikeins.fa", - "-o", - "test_out_spikeins"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - if platform.system() == "Linux": - clcode = ["razers3", "-dr", "0", "-i", "80", "-rr", "90", - "-f", "-o", "spikeins.sam", - "test_out_spikeins/spikeins_pre.fasta", - "../../data/examples/spikeins/test-spikeins.fa"] - print(" ".join(clcode)) - subprocess.check_call(clcode) - else: - shutil.copy("../../data/examples/spikeins/spikeins.sam", - "spikeins.sam") - clcode = ["mirtop", - "gff", - "--add-extra", - "--hairpin", "test_out_spikeins/spikeins_pre.fasta", - "--gtf", "test_out_spikeins/spikeins_pre.gff", - "-o", "test_out_mirs", - "spikeins.sam"] - print("") - print(" ".join(clcode)) - subprocess.check_call(clcode) - - @attr(complete=True) - @attr(cmd_update=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_spikeins=True) + ##@attr(cmd=True) + # def test_spikeins_cmd(self): + # """Run spikeins analysis + # """ + # import platform + # with make_workdir(): + # shutil.copy("../../data/examples/spikeins/spikeins.fa", + # "spikeins.fa") + # clcode = ["mirtop", + # "spikein", + # "spikeins.fa", + # "-o", + # "test_out_spikeins"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + # if platform.system() == "Linux": + # clcode = ["razers3", "-dr", "0", "-i", "80", "-rr", "90", + # "-f", "-o", "spikeins.sam", + # "test_out_spikeins/spikeins_pre.fasta", + # "../../data/examples/spikeins/test-spikeins.fa"] + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + # else: + # shutil.copy("../../data/examples/spikeins/spikeins.sam", + # "spikeins.sam") + # clcode = ["mirtop", + # "gff", + # "--add-extra", + # "--hairpin", "test_out_spikeins/spikeins_pre.fasta", + # "--gtf", "test_out_spikeins/spikeins_pre.gff", + # "-o", "test_out_mirs", + # "spikeins.sam"] + # print("") + # print(" ".join(clcode)) + # subprocess.check_call(clcode) + + ##@attr(complete=True) + ##@attr(cmd_update=True) + ##@attr(cmd=True) def test_update_cmd(self): """Run update analysis """ @@ -463,9 +464,9 @@ def test_update_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_validate_cmd(self): """Run update analysis """ @@ -478,9 +479,9 @@ def test_validate_cmd(self): subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_create_1_cmd(self): """Run sql command to incorporate GFF to SQLite """ @@ -499,9 +500,9 @@ def test_sql_create_1_cmd(self): subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_create_2_cmd(self): """Run sql command to incorporate GFF to SQLite """ @@ -517,9 +518,9 @@ def test_sql_create_2_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_showTables_cmd(self): """Run sql command to query from a database to show tables using SQLite """ @@ -535,9 +536,9 @@ def test_sql_query_showTables_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_showSchema_cmd(self): """Run sql command to query from a database to show schema using SQLite """ @@ -555,9 +556,9 @@ def test_sql_query_showSchema_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_showColumns_cmd(self): """Run sql command to query from a database to show columns using SQLite """ @@ -573,9 +574,9 @@ def test_sql_query_showColumns_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_descSummary_cmd(self): """Run sql command to query from a database to display the header of the GFF using SQLite """ @@ -591,9 +592,9 @@ def test_sql_query_descSummary_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_statIsomirs_cmd(self): """Run sql command to query from a database to summarize isomirs per miRNA """ @@ -611,9 +612,9 @@ def test_sql_query_statIsomirs_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_statIsomirsFile_cmd(self): """Run sql command to query from a database to summarize isomirs per miRNA reading from afile """ @@ -631,9 +632,9 @@ def test_sql_query_statIsomirsFile_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectLimit_cmd(self): """Run sql command to query from database using limit option """ @@ -651,9 +652,9 @@ def test_sql_query_SelectLimit_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectColumns_cmd(self): """Run sql command to query from database using limit option """ @@ -673,9 +674,9 @@ def test_sql_query_SelectColumns_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectMirna_cmd(self): """Run sql command to query from database for specific miRNAs """ @@ -697,9 +698,9 @@ def test_sql_query_SelectMirna_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectiVariant_cmd(self): """Run sql command to query from database for specific variant types """ @@ -719,9 +720,9 @@ def test_sql_query_SelectiVariant_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectFilter_cmd(self): """Run sql command to query from database using filters """ @@ -743,9 +744,9 @@ def test_sql_query_SelectFilter_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectCount_cmd(self): """Run sql command to query from database to fetch counts of the return values """ @@ -763,9 +764,9 @@ def test_sql_query_SelectCount_cmd(self): print(" ".join(clcode)) subprocess.check_call(clcode) - @attr(complete=True) - @attr(cmd_validate=True) - @attr(cmd=True) + ##@attr(complete=True) + ##@attr(cmd_validate=True) + ##@attr(cmd=True) def test_sql_query_SelectTextOut_cmd(self): """Run sql command to query from database and return the output to a text file """ diff --git a/test/test_functions.py b/test/test_functions.py index fb7f8f9..2ac4d6b 100644 --- a/test/test_functions.py +++ b/test/test_functions.py @@ -10,7 +10,7 @@ import contextlib import shutil -from nose.plugins.attrib import attr +#from nose.plugins.attrib import attr @contextlib.contextmanager @@ -64,7 +64,7 @@ def annotate(fn, read_file, load=False, create=True, keep_name=False, class FunctionsTest(unittest.TestCase): """Setup a full automated analysis and run the pipeline. """ - @attr(database=True) + #@pytest.mark.database def test_database(self): from mirtop.mirna import mapper args = argparse.Namespace() @@ -75,7 +75,7 @@ def test_database(self): if db != "miRBasev21": raise ValueError("%s not eq to miRBasev21" % db) - @attr(read_hairpin=True) + #@pytest.mark.read_hairpin def test_read_hairpin(self): from mirtop.mirna import mapper, fasta from mirtop.libs import logger @@ -96,7 +96,7 @@ def test_read_hairpin(self): # read data/aligments/let7-perfect.bam return True - @attr(read_hairpin_mirgenedb=True) + ##@attr(read_hairpin_mirgenedb=True) def test_read_hairpin_mirgenedb(self): from mirtop.mirna import mapper from mirtop.libs import logger @@ -105,7 +105,7 @@ def test_read_hairpin_mirgenedb(self): "data/db/mirgenedb/hsa.gff") print(map_mir) - @attr(read_mir2chr=True) + ##@attr(read_mir2chr=True) def test_read_mir2chr(self): from mirtop.mirna import mapper from mirtop.libs import logger @@ -114,7 +114,7 @@ def test_read_mir2chr(self): print(map_mir) # print(mapper.read_gtf_chr2mirna2("data/examples/annotate/hsa.gff3")) - @attr(read_mir2genomic=True) + ##@attr(read_mir2genomic=True) def test_read_mir2genomic(self): from mirtop.mirna import mapper from mirtop.libs import logger @@ -122,7 +122,7 @@ def test_read_mir2genomic(self): map_mir = mapper.read_gtf_to_mirna("data/examples/annotate/hsa.gff3") print(map_mir) - @attr(read_line=True) + ##@attr(read_line=True) def test_read_line(self): """Read GFF/GTF line""" from mirtop.gff.body import read_gff_line @@ -130,7 +130,7 @@ def test_read_line(self): for line in inh: print(read_gff_line(line)) - @attr(code=True) + ##@attr(code=True) def test_code(self): """testing code correction function""" from mirtop.mirna.realign import make_id, read_id @@ -153,7 +153,7 @@ def _convert(s, test, reverse=False): # if read_id("asD(-"): # raise ValueError("This should be False, Not valid code.") - @attr(code_convert=True) + ##@attr(code_convert=True) def test_code_convert(self): """testing code correction function""" from mirtop.mirna.realign import make_id @@ -164,7 +164,7 @@ def test_code_convert(self): if not make_id(read_uid_10("@#%$@2")) == "iso-13-B1NYDX": raise ValueError("Update ID is not working.") - @attr(cigar=True) + ##@attr(cigar=True) def test_cigar(self): """testing cigar correction function""" cigar = [[0, 14], [1, 1], [0, 5]] @@ -188,7 +188,7 @@ def test_cigar(self): raise ValueError("3MA3M not equal AAATCCC but %s" % cigar2snp("3MA3M", "AAATCCC")) - @attr(sequence=True) + ##@attr(sequence=True) def test_is_sequence(self): """testing if string is valid sequence""" from mirtop.mirna.realign import is_sequence @@ -197,7 +197,7 @@ def test_is_sequence(self): if is_sequence("AC2TGC"): raise ValueError("AC2TGC should return false.") - @attr(locala=True) + ##@attr(locala=True) def test_locala(self): """testing pairwise alignment""" from mirtop.mirna.realign import align @@ -209,7 +209,7 @@ def test_locala(self): print(align("TGANTAGTNGNTTGTATNGTT", "TGAGTATAGGCCTTGTATAGTT")[0]) print(align("NCANAGTCCAAGNTCATN", "TCATAGTCCAAGGTCATG")[0]) - @attr(reverse=True) + ##@attr(reverse=True) def test_reverse(self): """Test reverse complement function""" from mirtop.mirna.realign import reverse_complement @@ -218,7 +218,7 @@ def test_reverse(self): raise ValueError("ATGC complement is not: %s" % reverse_complement("ATGC")) - @attr(class_gff=True) + ##@attr(class_gff=True) def test_class(self): """Test class to read GFF line""" from mirtop.gff.classgff import feature @@ -231,7 +231,7 @@ def test_class(self): print(gff.columns) print(gff.attributes) - @attr(merge=True) + ##@attr(merge=True) def test_merge(self): """Test merge functions""" from mirtop.gff import merge @@ -250,7 +250,7 @@ def test_merge(self): if expression != "1,2": raise ValueError("This is wrong: %s" % expression) - @attr(align_mature=True) + ##@attr(align_mature=True) def test_variant(self): """testing get mature sequence""" from mirtop.mirna import fasta, mapper @@ -315,10 +315,10 @@ def test_variant(self): raise ValueError("Wrong alignment for test 8 %s" % res) - @attr(alignment=True) + ##@attr(alignment=True) def test_alignment(self): """testing alignments function""" - from mirtop.bam import bam + from mirtop import bam from mirtop.gff.classgff import feature fns = {"let7-last1D.sam": {56:"iso_add3p:1,iso_snv"}, "let7-1D.sam": {5:"iso_snv,iso_3p:-5"}, @@ -328,13 +328,13 @@ def test_alignment(self): "let7-triming.sam": {5:"iso_3p:+2",4:"iso_5p:-1",6:"iso_5p:+1,iso_3p:-3"}} #import pdb; pdb.set_trace() for fn in fns: - gff = annotate("data/aligments/%s" % fn, bam.read_bam) + gff = annotate("data/aligments/%s" % fn, bam.bam.read_bam) for pos in gff['hsa-let-7a-1']: f = feature(gff['hsa-let-7a-1'][pos][0][4]) if not set(f.attributes['Variant'].split(",")) == set(fns[fn][pos].split(",")): raise ValueError("Error in %s" % fn) - @attr(alignment_genomic=True) + ##@attr(alignment_genomic=True) def test_alignment_genomic(self): """testing alignments function""" from mirtop.bam import bam @@ -351,7 +351,7 @@ def test_alignment_genomic(self): bam.read_bam, gtf="data/db/mirbase/hsa.gff3", genomic=True)) - @attr(keep_name=True) + ##@attr(keep_name=True) def test_keep_name(self): from mirtop.bam import bam line = annotate("data/aligments/let7-perfect.sam", @@ -361,7 +361,7 @@ def test_keep_name(self): if line["hsa-let-7a-1"][5][0][4].find("seq_perfect_x2") < 0: raise ValueError("Keep name failed: %s" % line) - @attr(seqbuster=True) + ##@attr(seqbuster=True) def test_seqbuster(self): """testing reading seqbuster files function""" from mirtop.libs import logger @@ -375,7 +375,7 @@ def test_seqbuster(self): print("\nno frequency\n") annotate("data/examples/seqbuster/seqbuster_nofreq.mirna", seqbuster.read_file) - @attr(srnabench=True) + ##@attr(srnabench=True) def test_srnabench(self): """testing reading srnabench files function""" from mirtop.libs import logger @@ -384,7 +384,7 @@ def test_srnabench(self): from mirtop.importer import srnabench annotate("data/examples/srnabench", srnabench.read_file, create=False) - @attr(optimir=True) + ##@attr(optimir=True) def test_optimir(self): """testing reading optimir files function""" from mirtop.libs import logger @@ -393,7 +393,7 @@ def test_optimir(self): from mirtop.importer import optimir annotate("data/examples/optimir/synthetic_100_full.gff3", optimir.read_file, create=False) - @attr(prost=True) + ##@attr(prost=True) def test_prost(self): """testing reading prost files function""" from mirtop.libs import logger @@ -408,7 +408,7 @@ def test_prost(self): fn, precursors, "miRBasev21", "data/examples/annotate/hsa.gff3") annotate("data/example/prost/prost.example.txt", reads, True) - @attr(gff=True) + ##@attr(gff=True) def test_gff(self): """testing GFF function""" from mirtop.libs import logger @@ -419,7 +419,7 @@ def test_gff(self): annotate(bam_fn, bam.read_bam) return True - @attr(collapse=True) + ##@attr(collapse=True) def test_collapse(self): """testing GFF function""" from mirtop.libs import logger @@ -430,7 +430,7 @@ def test_collapse(self): annotate(bam_fn, bam.read_bam) return True - @attr(counts=True) + ##@attr(counts=True) def test_counts(self): """testing convert_gff_counts in convert.py function""" from mirtop.libs import logger @@ -452,7 +452,7 @@ def test_counts(self): return True - @attr(stats=True) + ##@attr(stats=True) def test_stats(self): """testing stats function""" from mirtop.gff import stats @@ -461,7 +461,7 @@ def test_stats(self): stats._dump_log(df, version, None) print(df) - @attr(variant=True) + ##@attr(variant=True) def test_string_variant(self): """testing parsing string variants""" from mirtop.gff import body @@ -477,7 +477,7 @@ def test_string_variant(self): if (truthv > list(gff.values())) - (list(gff.values()) > truthv): raise ValueError("Not found expected Values.") - @attr(validate=True) + ##@attr(validate=True) def test_validator(self): """test validator functions""" from mirtop.gff.validator import _check_file @@ -493,7 +493,7 @@ def test_validator(self): raise ValueError("Validator did catch an unexpected error in correct_file.gff.") - @attr(spikeins=True) + ###@attr(spikeins=True) def test_spikeins(self): """Test spikeins reading and annotation""" from mirtop.libs import spikeins @@ -519,19 +519,19 @@ def test_spikeins(self): fasta_precursor = fasta.read_precursor(file_fasta, None) print(fasta_precursor) - @attr(export_fasta=True) + #@attr(export_fasta=True) def test_export_fasta(self): from mirtop.exporter.fasta import _process print("\n") _process("data/examples/gff/2samples.gff", None) - @attr(update=True) + #@attr(update=True) def test_update(self): from mirtop.gff.update import update_file print("\n") update_file("data/examples/versions/version1.0.gff", None) - @attr(sql=True) + #@attr(sql=True) def test_sql(self): """testing mirtop_sql in sql.py function""" from mirtop.libs import logger @@ -548,13 +548,13 @@ def test_sql(self): os.remove(os.path.join(args.out, "SQL_sample.db")) return True - @attr(issue64=True) + #@attr(issue64=True) def test_issue64(self): from mirtop.bam.filter import tune subs, add, cigar = tune("TATCACAGTGGCTGTTCTTTTTT", "CCCCCTATCACAGTGGCTGTTCTTTTTT", 5, None) if add: raise ValueError("Bad annotation in for seqs with 6T/As at the end") - @attr(error69=True) + #@attr(error69=True) def test_error69(self): from mirtop.bam.filter import tune v = tune("CTTATCAGATTGTATTGTAATT",