Skip to content

Commit

Permalink
Merge pull request #49 from NBISweden/py3
Browse files Browse the repository at this point in the history
Converted to python3
  • Loading branch information
Juke34 authored Feb 20, 2020
2 parents 3bba80e + 2da5794 commit 778e706
Show file tree
Hide file tree
Showing 20 changed files with 14,459 additions and 14,444 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
language: python
python:
- "2.7"
- 3.5
- 3.7
script:
- t/test.sh
78 changes: 42 additions & 36 deletions EMBLmyGFF3/EMBLmyGFF3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python3

from .modules.utilities import *
from .modules.feature import Feature
from .modules.help import Help
from EMBLmyGFF3.version import __version__


from Bio import SeqIO, Entrez
from BCBio import GFF
from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition

import os
import sys
import gzip
import pprint
import time
import shutil
import logging
import argparse
import re

"""
EMBL writer for ENA data submission. Note that this implementation is basically
just the documentation at ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt
Expand All @@ -9,11 +30,12 @@

shameless_plug="""
#############################################################################
# NBIS 2019 - Sweden #
# EMBLmyGFF3 v{str1} #
# NBIS - National Bioinformatics Infrastructure Sweden #
# Authors: Martin Norling, Niclas Jareborg, Jacques Dainat #
# Please visit https://github.com/NBISweden/EMBLmyGFF3 for more information #
#############################################################################
\n"""
\n""".format(str1=__version__)

TODO="""
TODO: find list of previous ENA release dates and numbers
Expand All @@ -22,22 +44,6 @@
TODO: add way to handle mandatory features and feature qualifiers (especially contingent dependencies)
"""

import os
import sys
import gzip
import pprint
import time
import shutil
import logging
import argparse
import re
from modules.utilities import *
from Bio import SeqIO, Entrez
from BCBio import GFF
from Bio.SeqFeature import SeqFeature, FeatureLocation, ExactPosition
from modules.feature import Feature
from modules.help import Help

SCRIPT_DIR=os.path.dirname(__file__)
FEATURE_DIR=SCRIPT_DIR + "/modules/features"
QUALIFIER_DIR=SCRIPT_DIR + "/modules/qualifiers"
Expand Down Expand Up @@ -262,7 +268,7 @@ def _verify(self, key, key_type):
sys.stderr.write("\n'%s' is not a legal value for %s.\n" % (key, key_type))
sys.stderr.write("Legal values are:\n")
if type(self.legal_values[key_type]) == type({}):
for value, description in self.legal_values[key_type].iteritems():
for value, description in self.legal_values[key_type].items():
sys.stderr.write(" - %s\t%s\n" % (value, description))
else:
for value in self.legal_values[key_type]:
Expand All @@ -271,7 +277,7 @@ def _verify(self, key, key_type):
sys.stderr.write("Please enter a value: ")
else:
sys.stderr.write("Please enter new value: ")
key = raw_input()
key = input()
if key.isdigit(): key = int(key)

return key
Expand All @@ -289,7 +295,7 @@ def _verify_locus_tag(self, locus_tag):

if not locus_tag:
sys.stderr.write("No value provided as locus_tag.\nPlease provide a locus_tag (A default XXX locus_tag will be set up if none provided):")
locus_tag = raw_input()
locus_tag = input()
if not locus_tag:
checked_locus_tag="XXX"
break
Expand All @@ -314,7 +320,7 @@ def _verify_locus_tag(self, locus_tag):

if not checked_locus_tag:
sys.stderr.write("Please provide a locus_tag (A default XXX locus_tag will be set up if none provided):")
locus_tag = raw_input()
locus_tag = input()
if not locus_tag:
checked_locus_tag="XXX"
break
Expand Down Expand Up @@ -393,7 +399,7 @@ def print_progress(clear = False):

def handle_message(self, type, msg_type, msg, value):

if EMBL.PREVIOUS_ERRORS.has_key(msg_type):
if msg_type in EMBL.PREVIOUS_ERRORS:
EMBL.PREVIOUS_ERRORS[msg_type] += 1

level = eval("logging.%s" % type.upper())
Expand All @@ -403,7 +409,7 @@ def handle_message(self, type, msg_type, msg, value):
else:
if not value: # number of line accepted to display (defaut or given to the method)
value = 5
if not EMBL.PREVIOUS_ERRORS.has_key(msg_type) or EMBL.PREVIOUS_ERRORS[msg_type] < value:
if msg_type not in EMBL.PREVIOUS_ERRORS or EMBL.PREVIOUS_ERRORS[msg_type] < value:
logging.log(level, msg)
EMBL.PREVIOUS_ERRORS.setdefault(msg_type,1)
elif EMBL.PREVIOUS_ERRORS[msg_type] == value:
Expand Down Expand Up @@ -860,27 +866,27 @@ def set_classification(self, classification = None, strain = None, environmental
"when organism belongs to Bacteria. Please fill one of those information.(source feature keys containing "\
"the /environmental_sample qualifier should also contain the /isolation_source qualifier. entries including "\
"/environmental_sample must not include the /strain qualifier)\nStrain:")
strain = raw_input()
strain = input()
if strain:
EMBL.PREVIOUS_VALUES["strain"]=strain
onekey = strain
if not strain: #Entry with /environmental_sample must not include the /strain qualifier
environmental_sample = None
while environmental_sample != "n" and environmental_sample != "y" :
sys.stderr.write("Environmental_sample [y/n]:")
environmental_sample = raw_input()
environmental_sample = input()
if environmental_sample == "y":
EMBL.PREVIOUS_VALUES["environmental_sample"]=None
onekey = environmental_sample
isolation_source=None
sys.stderr.write("/environmental_sample qualifier should also contain the /isolation_source qualifier.")
while not isolation_source: #/environmental_sample qualifier should also contain the /isolation_source qualifier
sys.stderr.write("isolation_source:")
isolation_source = raw_input()
isolation_source = input()
EMBL.PREVIOUS_VALUES["isolation_source"]=isolation_source

sys.stderr.write("isolate:")
isolate = raw_input()
isolate = input()
if isolate:
EMBL.PREVIOUS_VALUES["isolate"]=isolate
onekey = isolate
Expand All @@ -900,7 +906,7 @@ def set_classification(self, classification = None, strain = None, environmental
sys.stderr.write("/environmental_sample qualifier should also contain the /isolation_source qualifier.\n")
while not isolation_source: #/environmental_sample qualifier should also contain the /isolation_source qualifier
sys.stderr.write("isolation_source:")
isolation_source = raw_input()
isolation_source = input()
EMBL.PREVIOUS_VALUES["isolation_source"]=isolation_source


Expand Down Expand Up @@ -1067,7 +1073,7 @@ def set_project_id(self, project_id = None):
else:
if not project_id:
sys.stderr.write("No project_id provided.\nPlease provide a project ID:")
project_id = raw_input()
project_id = input()
if not project_id:
project_id = "XXX"

Expand Down Expand Up @@ -1096,7 +1102,7 @@ def set_species(self, species = None):
else:
while not species:
sys.stderr.write("No value provided for species.\nPlease provide the scientific name or taxid of the organism:")
species = raw_input()
species = input()

species = self.get_species_from_taxid(species)
self.species = species
Expand Down Expand Up @@ -1313,17 +1319,17 @@ def main():
if args.gzip:
if not outfile.endswith(".embl.gz"):
outfile += ".gz" if outfile.endswith(".embl") else ".embl.gz"
outfile = gzip.open(outfile, "wb")
outfile = gzip.open(outfile, "wt")
else:
if not outfile.endswith(".embl"):
outfile += ".embl"
outfile = open(outfile, "wb")
outfile = open(outfile, "w")
else:
outfile = sys.stdout

logging.info("Reading sequence file")
infile = gzip.open(args.gff_file) if args.gff_file.endswith(".gz") else open(args.gff_file)
infasta = gzip.open(args.fasta) if args.fasta.endswith(".gz") else open(args.fasta)
infile = gzip.open(args.gff_file, 'rt') if args.gff_file.endswith(".gz") else open(args.gff_file)
infasta = gzip.open(args.fasta, 'rt') if args.fasta.endswith(".gz") else open(args.fasta)
seq_dict = SeqIO.to_dict( SeqIO.parse(infasta, "fasta") )
logging.info("Finished reading sequence file.")

Expand Down
4 changes: 2 additions & 2 deletions EMBLmyGFF3/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python3

from EMBLmyGFF3 import *
from .EMBLmyGFF3 import *
4 changes: 2 additions & 2 deletions EMBLmyGFF3/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python3

from EMBLmyGFF3 import main
from .EMBLmyGFF3 import main
main()
56 changes: 30 additions & 26 deletions EMBLmyGFF3/modules/feature.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from __future__ import division
from .qualifier import *
from .location import EMBLLocation
from .utilities import *

from Bio.Seq import Seq
from Bio.Data import CodonTable
from Bio.Alphabet.IUPAC import *
from Bio.SeqFeature import SeqFeature, FeatureLocation, BeforePosition, AfterPosition

import os
import sys
import json
import logging
from utilities import *
from operator import attrgetter
from Bio.Seq import Seq
from Bio.Data import CodonTable
from Bio.Alphabet.IUPAC import *
from location import EMBLLocation
from Bio.SeqFeature import SeqFeature, FeatureLocation, BeforePosition, AfterPosition
from qualifier import *

def chunk_format(string, chunk_string = None, offset = 0, chunk_size = 3, chunks_per_line = 30, indent = 6):
offset = offset%chunk_size
Expand Down Expand Up @@ -142,7 +142,7 @@ def __repr__(self):
Formats the feature as EMBL, limited to 80 character lines,
including sub features.
"""
output=unicode("")
output=str("")

if self.skip_feature is False or self.force_unknown_features or self.force_uncomplete_features:
output = self._feature_as_EMBL(self.no_wrap_qualifier) if self.type not in self.remove else ""
Expand Down Expand Up @@ -202,9 +202,13 @@ def _feature_as_EMBL(self, no_wrap_qualifier):
output = multiline("FT", string, featureType=self.type, wrap=59, split_char=",")

# Print qualifiers for the feature
for qualifier in self.qualifiers.values():
if qualifier.value:
output += qualifier.embl_format(no_wrap_qualifier)
for qualifier in sorted(self.qualifiers): # sort by qualifier name

# continue if qualifier has a value
if self.qualifiers[qualifier].value:
# sort by value
self.qualifiers[qualifier].value = sorted(self.qualifiers[qualifier].value)
output += self.qualifiers[qualifier].embl_format(no_wrap_qualifier)

return output

Expand Down Expand Up @@ -273,7 +277,7 @@ def _infer_ORFs(self, feature):
sub_feature._infer_ORFs(feature)

def _check_qualifier(self, feature):
for qualifier, value in self.qualifiers.iteritems():
for qualifier, value in self.qualifiers.items():

# Check presence of mandatory qualifier
if self.qualifiers[qualifier].mandatory:# Check if the qualifier is mandatory
Expand All @@ -288,7 +292,7 @@ def _load_data(self, feature, accessions):
"""
Parses a GFF feature and stores the data in the current Feature
"""
for qualifier, value in feature.qualifiers.iteritems():
for qualifier, value in feature.qualifiers.items():
logging.debug("Reading qualifier: %s (%s), translating to %s" % (qualifier, value, self._from_gff_qualifier(qualifier)))
self.add_qualifier( qualifier, value )

Expand All @@ -302,10 +306,10 @@ def _load_definition(self, filename):
try:
with open(filename) as data:
raw = json.load( data )
for key, value in raw.iteritems():
for key, value in raw.items():
#logging.error("key:%s value:%s",key,value)
if "qualifier" in key:
for item, definition in value.iteritems():
for item, definition in value.items():
#logging.error("item:%s definition:%s",item,definition)
self.legal_qualifiers += [item]
mandatory = "mandatory" in key
Expand Down Expand Up @@ -335,7 +339,7 @@ def _load_feature_translations(self, filenames):
data = json.load( open("%s/%s" % (local_dir, filename)) )
except IOError:
data = json.load( open("%s/%s" % (module_dir, filename)) )
for gff_feature, info in data.iteritems():
for gff_feature, info in data.items():
if info.get("remove", False):
self.remove += [gff_feature]
if "target" in info:
Expand All @@ -354,7 +358,7 @@ def _load_qualifier_translations(self, filenames):
data = json.load( open("%s/%s" % (local_dir, filename)) )
except IOError:
data = json.load( open("%s/%s" % (module_dir, filename)) )
for gff_feature, info in data.iteritems():
for gff_feature, info in data.items():
if "target" in info:
self.qualifier_translation_list[gff_feature] = info["target"]
if "prefix" in info:
Expand Down Expand Up @@ -503,7 +507,7 @@ def combine(self, other):
self.location += other.location

# combine qualifier except codon start
for gff_qualifier, list_val_other in other.qualifiers.iteritems():
for gff_qualifier, list_val_other in other.qualifiers.items():
other_qualifier = self._from_gff_qualifier(gff_qualifier) # get the real qualifier name in EMBL format to be able to compare with the one alredy saved
if other_qualifier != "codon_start":
self.add_qualifier(gff_qualifier, list_val_other)
Expand Down Expand Up @@ -619,7 +623,7 @@ def translation(self):

def handle_message(self, type, msg_type, msg, value):

if Feature.PREVIOUS_ERRORS.has_key(msg_type):
if msg_type in Feature.PREVIOUS_ERRORS:
Feature.PREVIOUS_ERRORS[msg_type] += 1

level = eval("logging.%s" % type.upper())
Expand All @@ -629,7 +633,7 @@ def handle_message(self, type, msg_type, msg, value):
else:
if not value: # number of line accepted to display (defaut or given to the method)
value = 5
if not Feature.PREVIOUS_ERRORS.has_key(msg_type) or Feature.PREVIOUS_ERRORS[msg_type] < value:
if msg_type not in Feature.PREVIOUS_ERRORS or Feature.PREVIOUS_ERRORS[msg_type] < value:
logging.log(level, msg)
Feature.PREVIOUS_ERRORS.setdefault(msg_type,1)
elif Feature.PREVIOUS_ERRORS[msg_type] == value:
Expand Down Expand Up @@ -660,11 +664,11 @@ def handle_message(self, type, msg_type, msg, value):
break

for gff_feature in record.features:
print gff_feature
print "_"*80
print(gff_feature)
print("_"*80)
feature = Feature( gff_feature, args.translation_file, 1, feature_definition_dir = "features", qualifier_definition_dir="qualifiers" )
print "_"*80
print feature
print("_"*80)
print(feature)
break
except Exception as e:
import traceback
Expand Down
10 changes: 5 additions & 5 deletions EMBLmyGFF3/modules/features/parse2json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python3
"""
Script to parse a "raw" text copy of http://www.insdc.org/files/feature_table.html#7.2 into
a set of json feature identifiers.
Expand Down Expand Up @@ -119,10 +119,10 @@ def parse_raw_to_json(infile):
extension = row.strip()
setattr(current, identifier, base + extension)
except Exception as e:
print "EXCEPTION: %s" % e
print "ID: '%s'" % identifier
print "ROW: '%s'" % row
print current
print("EXCEPTION: %s" % e)
print("ID: '%s'" % identifier)
print("ROW: '%s'" % row)
print(current)
import sys
sys.exit(0)
elif current != None:
Expand Down
Loading

0 comments on commit 778e706

Please sign in to comment.