diff --git a/pyradar/pyradar.h b/pyradar/pyradar.h index ee0c753..aa6503b 100644 --- a/pyradar/pyradar.h +++ b/pyradar/pyradar.h @@ -5,5 +5,10 @@ * * */ -int radar_run_from_files( const char * seq, const char * mali, const char * lfasta1, const char * lfasta2 ); -void radar_setLogLevel( int f); +int radar_run_from_files(const char * seq, + const char * mali, + const char * lfasta1, + const char * lfasta2, + int random_seed); + +void radar_setLogLevel(int f); diff --git a/pyradar/pyradar.pyx b/pyradar/pyradar.pyx index fc4c42e..d860bab 100644 --- a/pyradar/pyradar.pyx +++ b/pyradar/pyradar.pyx @@ -3,11 +3,15 @@ Pyrex extension classes used by 'radar.py'. """ cdef extern from "pyradar.h": - int radar_run_from_files( char *, char *, char *, char * ) - void radar_setLogLevel( int ) + int radar_run_from_files(char *, char *, char *, char *, unsigned int) + void radar_setLogLevel(int) -def run_from_files( filename_sequence, filename_mali, filename_lfasta, filename_lfasta2 ): - return radar_run_from_files( filename_sequence, filename_mali, filename_lfasta, filename_lfasta2 ) +def run_from_files(filename_sequence, filename_mali, + filename_lfasta, filename_lfasta2, + random_seed): + return radar_run_from_files(filename_sequence, filename_mali, + filename_lfasta, filename_lfasta2, + random_seed) def setLogLevel(v): """set the logging level.""" diff --git a/scripts/radar.py b/scripts/radar.py index 6edb713..3ec86bd 100644 --- a/scripts/radar.py +++ b/scripts/radar.py @@ -2,102 +2,99 @@ #### #### ## -## Project PairsDB +# Project PairsDB ## -## Copyright (C) 2002 Andreas Heger All rights reserved +# Copyright (C) 2002 Andreas Heger All rights reserved ## -## Author: Andreas Heger +# Author: Andreas Heger ## -## $Id: WrapperRadar.py,v 1.1.1.1 2002/07/02 10:46:58 heger Exp $ +# $Id: WrapperRadar.py,v 1.1.1.1 2002/07/02 10:46:58 heger Exp $ ## ## #### #### -USAGE="""radar.py [OPTIONS] filenames + +import gzip +import logging +import optparse +import os +import pyradar +import re +import shutil +import string +import subprocess +import sys +import tempfile + +USAGE = """radar.py [OPTIONS] filenames run radar on one or more fasta-formatted files. """ -import re, sys, os, string, tempfile, optparse, shutil, types, subprocess - rx_repeat = re.compile("^\s*(\d+)\-\s*(\d+)\s+\(\s*(\S+)\/\s*(\S+)\)\s+(\S+)") -##---------------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------------- DEBUG = 0 -import pyradar class RadarRepeat: - def __init__( self, file): + + def __init__(self, file): """Build members from a file.""" - ## read up to header + # read up to header while 1: line = file.readline() - if not line: break - - if line[0:3] == "No.": break + if not line: + break - ## read header + if line[0:3] == "No.": + break + + # read header line = file.readline() - data = string.split( line, "|") + data = string.split(line, "|") - self.mNRepeats = string.atoi(string.strip(data[0])) - self.mScore = string.atof(string.strip(data[1])) - self.mLength = string.atoi(string.strip(data[2])) - self.mDiagonal = string.atoi(string.strip(data[3])) - self.mBW_from = string.atoi(string.strip(data[4])) - self.mBW_to = string.atoi(string.strip(data[5])) - self.mLevel = string.atoi(string.strip(data[6])) + self.nrepeats = string.atoi(string.strip(data[0])) + self.score = string.atof(string.strip(data[1])) + self.length = string.atoi(string.strip(data[2])) + self.diagonal = string.atoi(string.strip(data[3])) + self.bw_from = string.atoi(string.strip(data[4])) + self.bw_to = string.atoi(string.strip(data[5])) + self.level = string.atoi(string.strip(data[6])) - ## read single repeats + # read single repeats file.readline() - self.mRepeatUnits = [] + self.repeat_units = [] while 1: line = file.readline() - if not line: break - - result = rx_repeat.search( line ) + if not line: + break - if not result: break - - self.mRepeatUnits.append( result.groups()) - - def GetBWFrom(self): - return self.mBW_from - def GetBWTo(self): - return self.mBW_to - def GetNRepeats(self): - return self.mNRepeats - def GetDiagonal(self): - return self.mDiagonal - def GetLength(self): - return self.mLength - def GetScore(self): - return self.mScore - def GetLevel(self): - return self.mLevel - def GetRepeatUnits( self ): - return self.mRepeatUnits - - def Print( self ): - print string.join ( - map( str, (self.mNRepeats, self.mScore, self.mLength, self.mLevel)) , - "\t" ) - print self.mRepeatUnits - - ##------------------------------------------------------------------------ - def BuildResult( self, file ): + result = rx_repeat.search(line) + + if not result: + break + + self.repeat_units.append(result.groups()) + + def __str__(self): + return "\t".join( + map(str, (self.nrepeats, self.score, self.length, self.level)) +\ + "\n".join(self.repeat_units)) + + def build_result(self, file): """parse the output file and return a tuple of repeats. """ repeats = [] - + while 1: line = file.readline() - if not line: break - + if not line: + break + if line[:-1] == "repeatfinder finished without problems": break elif line[:-1] == "Not enough dots given": @@ -106,139 +103,156 @@ def BuildResult( self, file ): break else: try: - repeats.append( RadarRepeat( file )) + repeats.append(RadarRepeat(file)) except ValueError: break return tuple(repeats) -##------------------------------------------------------------ -def multiple_fasta_iterator (filenames, regex_identifier = None ): + +def multiple_fasta_iterator(filenames, regex_identifier=None): """iterate over multiple fasta files.""" - + identifier = None - + for filename in filenames: if filename[-3:] == ".gz": - infile = gzip.open( filename, "r" ) + infile = gzip.open(filename, "r") elif filename == "-": infile = sys.stdin else: - infile = open( filename, "r") + infile = open(filename, "r") for line in infile: - if line[0] == "#": continue + if line[0] == "#": + continue - if line[0] == ">" : + if line[0] == ">": if identifier: yield identifier, "".join(fragments) - + if regex_identifier: try: - identifier = re.search(regex_identifier, line[1:-1]).groups()[0] + identifier = re.search( + regex_identifier, line[1:-1]).groups()[0] except AttributeError: - raise "could not parse identifier from line %s - check the input" % line[1:-1] + raise "could not parse identifier from line %s - check the input" % line[ + 1:-1] else: identifier = re.split("\s", line[1:-1])[0] fragments = [] else: - fragments.append (re.sub( "\s", "", line.strip() ) ) + fragments.append(re.sub("\s", "", line.strip())) - if filename != "-": infile.close() + if filename != "-": + infile.close() if identifier: yield identifier, "".join(fragments) raise StopIteration -def runLFasta( filename_sequence, filename_output, lfasta_options ): - - retcode = subprocess.call( "lfasta %s %s %s > %s " % \ - (filename_sequence, filename_sequence, lfasta_options, filename_output), - stderr=subprocess.PIPE, - shell=True) + +def run_lfasta(filename_sequence, filename_output, lfasta_options): + + retcode = subprocess.call("lfasta %s %s %s > %s " % + (filename_sequence, filename_sequence, + lfasta_options, filename_output), + stderr=subprocess.PIPE, + shell=True) if retcode != 0: - raise IOError( "error while running lfasta: code = %i" % retcode ) + raise IOError("error while running lfasta: code = %i" % retcode) + def main(): - parser = optparse.OptionParser( version = "%prog version: $Id$", usage = USAGE ) + parser = optparse.OptionParser(version="%prog version: $Id$", usage=USAGE) - parser.add_option( "-a", "--filename-fasta", dest="filename_fasta", type="string", action="append", - help="filename with fasta sequence [default=%default]") + parser.add_option("-a", "--filename-fasta", dest="filename_fasta", type="string", action="append", + help="filename with fasta sequence [default=%default]") parser.add_option("-v", "--verbose", dest="loglevel", type="int", - help="loglevel [%default]. The higher, the more output." ) + help="loglevel [%default]. The higher, the more output.") parser.add_option("-L", "--log", dest="stdlog", type="string", help="file with logging information [default = stdout].", - metavar = "FILE" ) - + metavar="FILE") + parser.add_option("-E", "--error", dest="stderr", type="string", help="file with error information [default = stderr].", - metavar = "FILE" ) - + metavar="FILE") + parser.add_option("-S", "--stdout", dest="stdout", type="string", help="file where output is to go [default = stdout].", - metavar = "FILE" ) - + metavar="FILE") + + parser.add_option("--keep-temp", dest="keep_temp", action="store_true", + help="keep temporary directory [%default].", + metavar="FILE") + + parser.add_option("--random-seed", dest="random_seed", type="int", + help="set random seed to value. Set to 0 to use random seed [%default].") + parser.set_defaults( - stderr = sys.stderr, - stdout = sys.stdout, - stdlog = sys.stdout, - filename_fasta = [], - loglevel = 0, - tmpdir = "/tmp", - regex_identifier = "^(\S+)" ) + stderr=sys.stderr, + stdout=sys.stdout, + stdlog=sys.stdout, + filename_fasta=[], + loglevel=0, + tmpdir="/tmp", + keep_temp=False, + random_seed=0, + regex_identifier="^(\S+)") (options, args) = parser.parse_args() - if options.stdout != sys.stdout: + logging.basicConfig(level=logging.INFO) + + if options.stdout != sys.stdout: options.stdout = open(options.stdout, "w") if options.stderr != sys.stderr: options.stderr = open(options.stderr, "w") if options.stdlog != sys.stdout: - options.stdlog = open(options.stdlog, "a") - + options.stdlog = open(options.stdlog, "a") + options.filename_fasta += args - + if len(options.filename_fasta) == 0: options.filename_fasta.append("-") - - pyradar.setLogLevel( options.loglevel ) - - iterator = multiple_fasta_iterator( options.filename_fasta, - regex_identifier = options.regex_identifier ) - - for identifier, sequence in iterator: - - options.stdout.write(">%s\n" % identifier ) - tmpdir = tempfile.mkdtemp( prefix = options.tmpdir + "/" ) - filename_sequence = tmpdir + "/seq" - filename_lfasta1 = tmpdir + "/lfasta1" - filename_lfasta2 = tmpdir + "/lfasta2" - outfile = open(filename_sequence, "w") - outfile.write(sequence) - outfile.close() - - runLFasta( filename_sequence, filename_lfasta1, "") - runLFasta( filename_sequence, filename_lfasta2, "-f -12 -g -2 -s 250" ) - - pyradar.run_from_files( filename_sequence, - filename_sequence, - filename_lfasta1, - filename_lfasta2 ) - - shutil.rmtree( tmpdir ) - -if __name__ == '__main__': - sys.exit(main()) - - - + pyradar.setLogLevel(options.loglevel) + iterator = multiple_fasta_iterator( + options.filename_fasta, + regex_identifier=options.regex_identifier) + for identifier, sequence in iterator: + + options.stdout.write(">%s\n" % identifier) + tmpdir = tempfile.mkdtemp(prefix=options.tmpdir + "/") + filename_sequence = os.path.join(tmpdir, "seq") + filename_lfasta1 = os.path.join(tmpdir, "lfasta1") + filename_lfasta2 = os.path.join(tmpdir, "lfasta2") + with open(filename_sequence, "w") as outf: + outf.write(sequence) + + run_lfasta(filename_sequence, filename_lfasta1, "") + run_lfasta(filename_sequence, filename_lfasta2, "-f -12 -g -2 -s 250") + + pyradar.run_from_files(filename_sequence, + filename_sequence, + filename_lfasta1, + filename_lfasta2, + random_seed=options.random_seed) + + if not options.keep_temp: + shutil.rmtree(tmpdir) + else: + logging.info("temporary directory {} will not be deleted".format( + tmpdir)) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/setup.py b/setup.py index b99c456..a6e2856 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,30 @@ #!/usr/bin/env python -import os, glob, sys, subprocess -import ez_setup -ez_setup.use_setuptools() +import glob +import sys +import subprocess +import setuptools from distutils.core import setup from distutils.extension import Extension -## check if cython exists +# check if cython exists try: from Cython.Distutils import build_ext except: print """Cython not installed. Please install cython from http://cython.org. """ - sys.exit (1) + sys.exit(1) -## check if lfasta exitst -retcode = subprocess.call( "which lfasta", - shell=True) +# check if lfasta exitst +retcode = subprocess.call("which lfasta", + shell=True) if retcode != 0: print """WARNING: could not find lfasta in your path. -Please download the source code of W.R. Pearson's fasta package and build -lfasta using the following commands: +Please download the source code of W.R. Pearson's fasta package and +build lfasta using the following commands: mkdir fasta2 cd fasta2 @@ -35,27 +36,26 @@ Copy the lfasta executable somewhere into your path. """ -c_sources = [ x for x in glob.glob( "src/*.c" ) if x not in "src/main.c" ] - +c_sources = [x for x in glob.glob("src/*.c") if x not in "src/main.c"] + # build the interface pyradar = Extension( "pyradar", # name of extension - [ "pyradar/pyradar.pyx", ] + c_sources, - library_dirs=[], - libraries=[], - language="c", - ) + ["pyradar/pyradar.pyx", ] + c_sources, + library_dirs=[], + libraries=[], + language="c", +) setup(name='Radar', - version='1.1.3', + version='1.2', description='RADAR - Rapid Automatic Detection and Alignment of Repeats', - author='Andreas Heger', - author_email='andreas.heger@helsinki.fi', - url='http://wwwfgu.anat.ox.ac.uk/~andreas', - package_dir = {}, - packages = [], - scripts=['scripts/radar.py',], - ext_modules=[ pyradar ], - cmdclass = {'build_ext': build_ext} - ) - \ No newline at end of file + author='Andreas Heger & Liisa Holm', + author_email='andreas.heger@gmail.com', + url='https://github.com/AndreasHeger/radar', + package_dir={}, + packages=[], + scripts=['scripts/radar.py', ], + ext_modules=[pyradar], + cmdclass={'build_ext': build_ext} + ) diff --git a/src/radar.c b/src/radar.c index 3424b9c..31a9a1e 100644 --- a/src/radar.c +++ b/src/radar.c @@ -2148,10 +2148,11 @@ extern char globalresult[MAXRESULTSIZE]; } int radar_run_from_files( - const char * filename_sequence, - const char * filename_ma, - const char * filename_lfasta, - const char * filename_lfasta2) + const char * filename_sequence, + const char * filename_ma, + const char * filename_lfasta, + const char * filename_lfasta2, + unsigned int random_seed) { int lprofile; PROFILECOLUMN *profile; @@ -2167,7 +2168,12 @@ int radar_run_from_files( WellcomeMsg(); globalresult[0] = '\0'; - /* read sequence and allocate memory for the masked sequence -------------------------------------------------------*/ + if (random_seed != 0) + srand(random_seed); + else + srand((unsigned int)clock()); + + /* read sequence and allocate memory for the masked sequence -----*/ if (verbose > LL1) printf("Reading sequence..."); lsequence = ReadSequence( filename_sequence, &sequence ); if (verbose > LL1) printf("Done (length=%i)\n", lsequence); diff --git a/src/radar.h b/src/radar.h index a0de6aa..bcb3e6f 100644 --- a/src/radar.h +++ b/src/radar.h @@ -110,7 +110,7 @@ int CountAndScoreRepeats(); void CalculateZScore(); -int radar_run_from_files( const char *, const char *, const char *, const char *); -void radar_setLogLevel( int ); +int radar_run_from_files(const char *, const char *, const char *, const char *, unsigned int); +void radar_setLogLevel(int); -#endif \ No newline at end of file +#endif diff --git a/src/repeatstatistics.c b/src/repeatstatistics.c index 5052939..abc0cf5 100644 --- a/src/repeatstatistics.c +++ b/src/repeatstatistics.c @@ -119,7 +119,6 @@ double *mean, *standard_deviation; CALCTYPE *scores = Malloc( iterations * sizeof(CALCTYPE)); int i; - srand((unsigned int)clock()); /* get scores for iterations alignments of shuffled sequences against profile---------------------------------------------------*/ for (i = 0; i < iterations; i++) { ShuffleSequence( rsequence, lsequence );