From 5153f53d6764e13e23edb0721bb81300bfba68ca Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Tue, 27 Aug 2019 20:34:09 +0200 Subject: [PATCH 1/7] Fix #38; update EMBLmyGFF3 to 1.2.7 --- EMBLmyGFF3/EMBLmyGFF3.py | 6 +++--- setup.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/EMBLmyGFF3/EMBLmyGFF3.py b/EMBLmyGFF3/EMBLmyGFF3.py index 8bb0dc1..2eafe06 100755 --- a/EMBLmyGFF3/EMBLmyGFF3.py +++ b/EMBLmyGFF3/EMBLmyGFF3.py @@ -188,12 +188,12 @@ def _add_mandatory(self): try: start = seq.index('n') while start: - logging.debug("There is gap starting at position %s", start) + logging.debug("There is gap in %s starting at position %s" % (self.record.name,start)) # Now find the end end = start + 1 - while end: + while end < len(seq): if seq[end] == 'n' : - end +=1 + end +=1 else: break diff --git a/setup.py b/setup.py index 29f93cc..1b1100b 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ setup( name='EMBLmyGFF3', - version='1.2.6', + version='1.2.7', description='An efficient way to convert gff3 annotation files into EMBL format ready to submit', url='https://github.com/NBISweden/EMBLmyGFF3', - download_url='https://github.com/NBISweden/EMBLmyGFF3/archive/v1.2.6.tar.gz', + download_url='https://github.com/NBISweden/EMBLmyGFF3/archive/v1.2.7.tar.gz', author='Martin Norling, Niclas Jareborg, Jacques Dainat', license='GPL-3.0', From 40d17b8d078fc4a59b701d1a22a97ccbd2cdc657 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 30 Aug 2019 13:44:03 +0200 Subject: [PATCH 2/7] fix #40 --- EMBLmyGFF3/EMBLmyGFF3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/EMBLmyGFF3/EMBLmyGFF3.py b/EMBLmyGFF3/EMBLmyGFF3.py index 2eafe06..24fab20 100755 --- a/EMBLmyGFF3/EMBLmyGFF3.py +++ b/EMBLmyGFF3/EMBLmyGFF3.py @@ -1383,6 +1383,7 @@ def main(): writer.write_all( outfile ) writer = None - EMBL.print_progress(True) + if args.progress: + EMBL.print_progress(True) sys.stderr.write( """Conversion done\n""") From 2bdc806f65dc57acad15cc423f07c398607153ff Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Sun, 6 Oct 2019 10:31:40 +0200 Subject: [PATCH 3/7] add warining when seqid forn gff absent from fasta. Fix #41 --- EMBLmyGFF3/EMBLmyGFF3.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/EMBLmyGFF3/EMBLmyGFF3.py b/EMBLmyGFF3/EMBLmyGFF3.py index 24fab20..0a153c4 100755 --- a/EMBLmyGFF3/EMBLmyGFF3.py +++ b/EMBLmyGFF3/EMBLmyGFF3.py @@ -1340,6 +1340,14 @@ def main(): infile.seek(0, 0) for record in GFF.parse(infile, base_dict=seq_dict): + + # Check existence of gff seqid among the fasta sequence identifiers + if record.id not in seq_dict: + logging.warning("Sequence id <%s> from the gff file not found within the fasta file. Are you sure to provide the correct" \ + " fasta file? The tool will create a string of ???? as sequence (its length will be the end position of the last feature). " \ + "For you information, if you use the --translate option the tool will raise an error due to ??? codons that do not exist." % (record.id)) + + # Check sequence size and skip if < 100 bp if len(record.seq)<100: logging.warning("Sequence %s too short (%s bp)! Minimum accpeted by ENA is 100, we skip it !" % (record.name, len(record.seq) ) ) continue From c21bcd68ada8ce8c5d60649b38ad029f3a6010e8 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 7 Oct 2019 20:44:14 +0200 Subject: [PATCH 4/7] split long lines --- EMBLmyGFF3/EMBLmyGFF3.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/EMBLmyGFF3/EMBLmyGFF3.py b/EMBLmyGFF3/EMBLmyGFF3.py index 0a153c4..1407d62 100755 --- a/EMBLmyGFF3/EMBLmyGFF3.py +++ b/EMBLmyGFF3/EMBLmyGFF3.py @@ -693,7 +693,8 @@ def FT(self): break if not locus_tag: #inform the user that we will use the locus_tag instead msg_type = "I'm suppose to use the value of the attribute %s from the gff3 file as locus_tag but this attribute doesnt exist" % (attribute) - msg = "I'm suppose to use the value of the attribute %s from the gff3 file as locus_tag but this attribute doesnt exist for feature %s. Consequently I will use the locus_tag %s to create a proper one." % (attribute, feature.id, self.locus_tag) + msg = "I'm suppose to use the value of the attribute %s from the gff3 file as locus_tag but this attribute doesnt exist for feature %s. "\ + "Consequently I will use the locus_tag %s to create a proper one." % (attribute, feature.id, self.locus_tag) self.handle_message("warning", msg_type, msg, None) # create a locus tag base on the prefix + LOCUS + incremented number if not locus_tag: @@ -854,7 +855,10 @@ def set_classification(self, classification = None, strain = None, environmental if not strain and not environmental_sample and not isolate: #no information provided, let's ask the user onekey = None while not onekey: - sys.stderr.write("At least one of the following qualifiers \"strain, environmental_sample, isolate\" must exist when organism belongs to Bacteria. Please fill one of those information.(source feature keys containing the /environmental_sample qualifier should also contain the /isolation_source qualifier. entries including /environmental_sample must not include the /strain qualifier)\nStrain:") + sys.stderr.write("At least one of the following qualifiers \"strain, environmental_sample, isolate\" must exist " \ + "when organism belongs to Bacteria. Please fill one of those information.(source feature keys containing "\ + "the /environmental_sample qualifier should also contain the /isolation_source qualifier. entries including "\ + "/environmental_sample must not include the /strain qualifier)\nStrain:") strain = raw_input() if strain: EMBL.PREVIOUS_VALUES["strain"]=strain From cd60779f47ab1bf6ad0f108ad368ed65b51552b0 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 7 Oct 2019 20:49:16 +0200 Subject: [PATCH 5/7] location + strand by defaut (part issue #42) + code simplification + logging update --- EMBLmyGFF3/modules/location.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/EMBLmyGFF3/modules/location.py b/EMBLmyGFF3/modules/location.py index a3f63da..6e3dce0 100755 --- a/EMBLmyGFF3/modules/location.py +++ b/EMBLmyGFF3/modules/location.py @@ -19,23 +19,36 @@ def __repr__(self): output = "" suffix = "" - complement = True - if len(self.location.parts) == len([l for l in self.location.parts if l.strand < 0]): + + # Store all strand from the location parts to check potential inconsistency + strand=[] + for l in self.location.parts: + if l.strand != None: + if l.strand not in strand: + strand.append(l.strand) + + if len(strand) == 0: + logging.debug("No strand stored among the location_parts %s" % self.location.parts) + elif len(strand) > 1: + logging.error("Different strand stored in location_parts (+ strand will be used as default): %s" % self.location.parts) + elif strand == [1]: + logging.debug("+ strand") + else: + logging.debug("- strand") output += "complement(" suffix += ")" - complement = False + + # If more than one part let's join the differnt parts together if (len(self.location.parts) > 1): output += "join(" suffix += ")" - output += ",".join(self._format_parts(self.location.parts, complement=complement)) + + output += ",".join(self._format_parts(self.location.parts)) return output + suffix - def _format_parts(self, parts, complement = True): + def _format_parts(self, parts): output = [] for part in parts: - if part.strand > 0 or complement == False: - output += ["%s..%s" % (type(part.start)(part.start+1), type(part.end)(part.end+0))] - else: - output += ["complement(%s..%s)" % (type(part.start)(part.start+1), type(part.end)(part.end+0))] - return output + output += ["%s..%s" % (type(part.start)(part.start+1), type(part.end)(part.end+0))] + return output \ No newline at end of file From 5919107ac0b961e0b67bf4ba629d2db72e30e69d Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 7 Oct 2019 21:17:55 +0200 Subject: [PATCH 6/7] raise an error and stop when we meet a CDS wihtout strand. Fix #42 --- EMBLmyGFF3/modules/feature.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/EMBLmyGFF3/modules/feature.py b/EMBLmyGFF3/modules/feature.py index f4f9f2e..7e5eab4 100755 --- a/EMBLmyGFF3/modules/feature.py +++ b/EMBLmyGFF3/modules/feature.py @@ -251,6 +251,16 @@ def _infer_ORFs(self, feature): # basic info strand = self.location.strand + # raise an error if no strand for the CDS. Strand is not mandatory (can be a dot) except for CDS where it has an + # impact on the translation, and to check where is start and stop codon... + if strand == None: + ID='' + for qualifier in self.feature.qualifiers: + if 'id' == qualifier.lower(): + ID = "%s" % " ".join(self.feature.qualifiers[qualifier]) + break + logging.error('CDS %s does not have any strand! Please check your gff file.' % ID) + sys.exit() if start_codon.upper() not in codon_table.start_codons: self.location = self._set_before(self.location) From 865dab25e9804ff98b4d22be1a3a2e29ee86b97e Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Mon, 7 Oct 2019 21:21:59 +0200 Subject: [PATCH 7/7] update to version 1.2.8 --- EMBLmyGFF3/EMBLmyGFF3.py | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/EMBLmyGFF3/EMBLmyGFF3.py b/EMBLmyGFF3/EMBLmyGFF3.py index 1407d62..18e5870 100755 --- a/EMBLmyGFF3/EMBLmyGFF3.py +++ b/EMBLmyGFF3/EMBLmyGFF3.py @@ -9,7 +9,7 @@ shameless_plug=""" ############################################################################# - # NBIS 2018 - Sweden # + # NBIS 2019 - Sweden # # Authors: Martin Norling, Niclas Jareborg, Jacques Dainat # # Please visit https://github.com/NBISweden/EMBLmyGFF3 for more information # ############################################################################# diff --git a/setup.py b/setup.py index 1b1100b..217798b 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ setup( name='EMBLmyGFF3', - version='1.2.7', + version='1.2.8', description='An efficient way to convert gff3 annotation files into EMBL format ready to submit', url='https://github.com/NBISweden/EMBLmyGFF3', - download_url='https://github.com/NBISweden/EMBLmyGFF3/archive/v1.2.7.tar.gz', + download_url='https://github.com/NBISweden/EMBLmyGFF3/archive/v1.2.8.tar.gz', author='Martin Norling, Niclas Jareborg, Jacques Dainat', license='GPL-3.0',