From 9d372697173fd841da7e60b72f10174dd72a88e7 Mon Sep 17 00:00:00 2001 From: nicsilvester Date: Wed, 14 Jun 2017 14:49:54 +0100 Subject: [PATCH] Expanded support of which accession will download a WGS set Previously, only LLLLVV prefix would work (eg AAAK03). Have now added the following: LLLL - unversioned prefix, will get latest WGS set version. E.g. AAAK LLLLVV000000 - versioned master accession. E.g. AAAK03000000 LLLL00000000 - unversioned master accession. E.g. AAAK00000000 --- python/analysisGet.py | 4 ++-- python/assemblyGet.py | 4 ++-- python/enaDataGet.py | 4 ++-- python/enaGroupGet.py | 4 ++-- python/readGet.py | 4 ++-- python/sequenceGet.py | 53 +++++++++++++++++++++++++++++++----------- python/utils.py | 42 ++++++++++++++++++++++++++++----- python3/analysisGet.py | 4 ++-- python3/assemblyGet.py | 4 ++-- python3/enaDataGet.py | 4 ++-- python3/enaGroupGet.py | 4 ++-- python3/readGet.py | 4 ++-- python3/sequenceGet.py | 50 +++++++++++++++++++++++++++++---------- python3/utils.py | 42 ++++++++++++++++++++++++++++----- 14 files changed, 170 insertions(+), 57 deletions(-) diff --git a/python/analysisGet.py b/python/analysisGet.py index b94176c..e9963b6 100644 --- a/python/analysisGet.py +++ b/python/analysisGet.py @@ -16,7 +16,7 @@ def set_parser(): help='Destination directory (default is current running directory)') parser.add_argument('-m', '--meta', action='store_true', help='Download analysis XML in addition to data files (default is false)') - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser @@ -38,7 +38,7 @@ def set_parser(): try: readGet.download_files(accession, utils.SUBMITTED_FORMAT, dest_dir, False, fetch_meta) - print 'Download completed' + print 'Completed' except Exception: utils.print_error() sys.exit(1) diff --git a/python/assemblyGet.py b/python/assemblyGet.py index be013c0..6396eb7 100755 --- a/python/assemblyGet.py +++ b/python/assemblyGet.py @@ -26,7 +26,7 @@ def set_parser(): help='Destination directory (default is current running directory)') parser.add_argument('-w', '--wgs', action='store_true', help='Download WGS set if available (default is false)') - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def check_format(format): @@ -133,7 +133,7 @@ def download_assembly(dest_dir, accession, format, fetch_wgs, quiet=False): try: download_assembly(dest_dir, accession, format, fetch_wgs) - print 'Download completed' + print 'Completed' except Exception: utils.print_error() sys.exit(1) diff --git a/python/enaDataGet.py b/python/enaDataGet.py index a628f09..640daf8 100644 --- a/python/enaDataGet.py +++ b/python/enaDataGet.py @@ -31,7 +31,7 @@ def set_parser(): parser.add_argument('-i', '--index', action='store_true', help="""Download CRAM index files with submitted CRAM files, if any (default is false). This flag is ignored for fastq and sra format options. """) - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser @@ -73,7 +73,7 @@ def set_parser(): else: print 'Error: Invalid accession provided' sys.exit(1) - print 'Download completed' + print 'Completed' except Exception: utils.print_error() sys.exit(1) diff --git a/python/enaGroupGet.py b/python/enaGroupGet.py index 003fbea..dde927f 100644 --- a/python/enaGroupGet.py +++ b/python/enaGroupGet.py @@ -32,7 +32,7 @@ def set_parser(): parser.add_argument('-i', '--index', action='store_true', help="""Download CRAM index files with submitted CRAM files, if any (default is false). This flag is ignored for fastq and sra format options. """) - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def download_report(group, result, accession, temp_file): @@ -146,7 +146,7 @@ def download_group(accession, group, format, dest_dir, fetch_wgs, fetch_meta, fe try: download_group(accession, group, format, dest_dir, fetch_wgs, fetch_meta, fetch_index) - print 'Download completed' + print 'Completed' except Exception: utils.print_error() sys.exit(1) diff --git a/python/readGet.py b/python/readGet.py index e07bb9b..74560dd 100644 --- a/python/readGet.py +++ b/python/readGet.py @@ -21,7 +21,7 @@ def set_parser(): parser.add_argument('-i', '--index', action='store_true', help="""Download CRAM index files with submitted CRAM files, if any (default is false). This flag is ignored for fastq and sra format options""") - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def check_read_format(format): @@ -132,7 +132,7 @@ def download_files(accession, format, dest_dir, fetch_index, fetch_meta): try: download_files(accession, format, dest_dir, fetch_index, fetch_meta) - print 'Download completed' + print 'Completed' except Exception: utils.print_error() sys.exit(1) diff --git a/python/sequenceGet.py b/python/sequenceGet.py index 1af7e35..32b2d40 100755 --- a/python/sequenceGet.py +++ b/python/sequenceGet.py @@ -11,11 +11,11 @@ def set_parser(): parser = argparse.ArgumentParser(prog='sequenceGet', description='Download sequence data for a given INSDC accession') parser.add_argument('accession', help='INSDC sequence/coding accession or WGS prefix (LLLLVV) to fetch') - parser.add_argument('-f', '--format', default='embl', choices=['embl', 'fasta'], - help='File format required (default is embl)') + parser.add_argument('-f', '--format', default='embl', choices=['embl', 'fasta', 'master'], + help='File format required (default is embl); master format only available for WGS') parser.add_argument('-d', '--dest', default='.', help='Destination directory (default is current running directory)') - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def append_record(dest_file, accession, format): @@ -28,12 +28,20 @@ def download_sequence(dest_dir, accession, format): success = utils.download_record(dest_dir, accession, format) if not success: print 'Unable to fetch file for ' + accession + ', format ' + format + return success def download_wgs(dest_dir, accession, format): + if utils.is_unversioned_wgs_set(accession): + return download_unversioned_wgs(dest_dir, accession, format) + else: + return download_versioned_wgs(dest_dir, accession, format) + +def download_versioned_wgs(dest_dir, accession, format): + prefix = accession[:6] if format is None: format = utils.EMBL_FORMAT - public_set_url = utils.get_wgs_ftp_url(accession, utils.PUBLIC, format) - supp_set_url = utils.get_wgs_ftp_url(accession, utils.SUPPRESSED, format) + public_set_url = utils.get_wgs_ftp_url(prefix, utils.PUBLIC, format) + supp_set_url = utils.get_wgs_ftp_url(prefix, utils.SUPPRESSED, format) success = utils.get_ftp_file(public_set_url, dest_dir) if not success: success = utils.get_ftp_file(supp_set_url, dest_dir) @@ -41,31 +49,50 @@ def download_wgs(dest_dir, accession, format): print 'No WGS set file available for ' + accession + ', format ' + format print 'Please contact ENA (datasubs@ebi.ac.uk) if you feel this set should be available' +def download_unversioned_wgs(dest_dir, accession, format): + prefix = accession[:4] + if format is None: + format = utils.EMBL_FORMAT + public_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.PUBLIC, format) + if public_set_url is not None: + utils.get_ftp_file(public_set_url, dest_dir) + else: + supp_set_url = utils.get_nonversion_supp_wgs_ftp_url(prefix, format) + if supp_set_url is not None: + utils.get_ftp_file(supp_set_url, dest_dir) + else: + print 'No WGS set file available for ' + accession + ', format ' + format + print 'Please contact ENA (datasubs@ebi.ac.uk) if you feel this set should be available' + def check_format(format): - if format not in [utils.EMBL_FORMAT, utils.FASTA_FORMAT]: - print 'Please select a valid format for this accession: ', [utils.EMBL_FORMAT, utils.FASTA_FORMAT] + allowed_formats = [utils.EMBL_FORMAT, utils.FASTA_FORMAT, utils.MASTER_FORMAT] + if format not in allowed_formats: + print 'Please select a valid format for this accession: ', allowed_formats sys.exit(1) if __name__ == '__main__': parser = set_parser() args = parser.parse_args() - accession = args.accession + accession = args.accession.upper() format = args.format dest_dir = args.dest try: - if utils.is_sequence(accession) or utils.is_coding(accession): + if utils.is_wgs_set(accession): + download_wgs(dest_dir, accession, format) + elif utils.is_sequence(accession) or utils.is_coding(accession): if not utils.is_available(accession): print 'Record does not exist or is not available for accession provided' sys.exit(1) + if format == utils.MASTER_FORMAT: + print 'Invalid format. master format only available for WGS sets' + sys.exit(1) download_sequence(dest_dir, accession, format) - elif utils.is_wgs_set(accession): - download_wgs(dest_dir, accession, format) else: - print 'Error: Invalid accession. A sequence or coding accession or a WGS set prefix (LLLLVV) must be provided' + print 'Error: Invalid accession. A sequence or coding accession or a WGS set (prefix or master accession) must be provided' sys.exit(1) - print 'Download completed' + print 'Completed' except Exception: utils.print_error() sys.exit(1) diff --git a/python/utils.py b/python/utils.py index bf8238f..0c9e529 100644 --- a/python/utils.py +++ b/python/utils.py @@ -3,6 +3,7 @@ # import base64 +import ftplib import re import os import subprocess @@ -55,6 +56,7 @@ SEQUENCE_RELEASE_ID='sequence_release' WGS_FTP_BASE = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/wgs' +WGS_FTP_DIR = 'pub/databases/ena/wgs' PORTAL_SEARCH_BASE = 'http://www.ebi.ac.uk/ena/portal/api/search?' RUN_RESULT = 'result=read_run' @@ -78,6 +80,9 @@ sequence_pattern_3 = re.compile('^[A-Z]{4}[0-9]{8,9}(\.[0-9]+)?$') coding_pattern = re.compile('^[A-Z]{3}[0-9]{5}(\.[0-9]+)?$') wgs_prefix_pattern = re.compile('^[A-Z]{4}[0-9]{2}$') +wgs_master_pattern = re.compile('^[A-Z]{4}[0-9]{2}[0]{6}$') +unversion_wgs_prefix_pattern = re.compile('^[A-Z]{4}$') +unversion_wgs_master_pattern = re.compile('^[A-Z]{4}[0]{8}$') run_pattern = re.compile('^[EDS]RR[0-9]{6,7}$') experiment_pattern = re.compile('^[EDS]RX[0-9]{6,7}$') analysis_pattern = re.compile('^[EDS]RZ[0-9]{6,7}$') @@ -102,7 +107,14 @@ def is_coding(accession): return coding_pattern.match(accession) def is_wgs_set(accession): - return wgs_prefix_pattern.match(accession) + return wgs_prefix_pattern.match(accession) \ + or wgs_master_pattern.match(accession) \ + or unversion_wgs_prefix_pattern.match(accession) \ + or unversion_wgs_master_pattern.match(accession) + +def is_unversioned_wgs_set(accession): + return unversion_wgs_prefix_pattern.match(accession) \ + or unversion_wgs_master_pattern.match(accession) def is_run(accession): return run_pattern.match(accession) @@ -244,14 +256,32 @@ def get_ftp_file_with_md5_check(ftp_url, dest_dir, md5): except Exception: return False -def get_wgs_ftp_url(wgs_set, status, format): - base_url = WGS_FTP_BASE + '/' + status + '/' + wgs_set[:2].lower() + '/' + wgs_set +def get_wgs_file_ext(format): if format == EMBL_FORMAT: - return base_url + WGS_EMBL_EXT + return WGS_EMBL_EXT elif format == FASTA_FORMAT: - return base_url + WGS_FASTA_EXT + return WGS_FASTA_EXT elif format == MASTER_FORMAT: - return base_url + WGS_MASTER_EXT + return WGS_MASTER_EXT + +def get_wgs_ftp_url(wgs_set, status, format): + base_url = WGS_FTP_BASE + '/' + status + '/' + wgs_set[:2].lower() + '/' + wgs_set + return base_url + get_wgs_file_ext(format) + +def get_nonversioned_wgs_ftp_url(wgs_set, status, format): + ftp_url = 'ftp.ebi.ac.uk' + base_dir = WGS_FTP_DIR + '/' + status + '/' + wgs_set[:2].lower() + base_url = WGS_FTP_BASE + '/' + status + '/' + wgs_set[:2].lower() + ftp = ftplib.FTP(ftp_url) + ftp.login() + ftp.cwd(base_dir) + supp = ftp.nlst() + ftp.close() + files = [f for f in supp if f.startswith(wgs_set) and f.endswith(get_wgs_file_ext(format))] + if len(files) == 0: + return None + else: + return base_url + '/' + max(files) def get_report_from_portal(url): request = urllib2.Request(url) diff --git a/python3/analysisGet.py b/python3/analysisGet.py index b04160d..51a8110 100644 --- a/python3/analysisGet.py +++ b/python3/analysisGet.py @@ -16,7 +16,7 @@ def set_parser(): help='Destination directory (default is current running directory)') parser.add_argument('-m', '--meta', action='store_true', help='Download analysis XML in addition to data files (default is false)') - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser @@ -38,7 +38,7 @@ def set_parser(): try: readGet.download_files(accession, utils.SUBMITTED_FORMAT, dest_dir, False, fetch_meta) - print ('Download completed') + print ('Completed') except Exception: utils.print_error() sys.exit(1) diff --git a/python3/assemblyGet.py b/python3/assemblyGet.py index 75dda9e..e8ddc4e 100644 --- a/python3/assemblyGet.py +++ b/python3/assemblyGet.py @@ -26,7 +26,7 @@ def set_parser(): help='Destination directory (default is current running directory)') parser.add_argument('-w', '--wgs', action='store_true', help='Download WGS set if available (default is false)') - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def check_format(format): @@ -133,7 +133,7 @@ def download_assembly(dest_dir, accession, format, fetch_wgs, quiet=False): try: download_assembly(dest_dir, accession, format, fetch_wgs) - print ('Download completed') + print ('Completed') except Exception: utils.print_error() sys.exit(1) diff --git a/python3/enaDataGet.py b/python3/enaDataGet.py index caf095f..e9c47bf 100644 --- a/python3/enaDataGet.py +++ b/python3/enaDataGet.py @@ -31,7 +31,7 @@ def set_parser(): parser.add_argument('-i', '--index', action='store_true', help="""Download CRAM index files with submitted CRAM files, if any (default is false). This flag is ignored for fastq and sra format options. """) - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser @@ -73,7 +73,7 @@ def set_parser(): else: print ('Error: Invalid accession provided') sys.exit(1) - print ('Download completed') + print ('Completed') except Exception: utils.print_error() sys.exit(1) diff --git a/python3/enaGroupGet.py b/python3/enaGroupGet.py index 6df2f21..ad7a5a9 100644 --- a/python3/enaGroupGet.py +++ b/python3/enaGroupGet.py @@ -32,7 +32,7 @@ def set_parser(): parser.add_argument('-i', '--index', action='store_true', help="""Download CRAM index files with submitted CRAM files, if any (default is false). This flag is ignored for fastq and sra format options. """) - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def download_report(group, result, accession, temp_file): @@ -146,7 +146,7 @@ def download_group(accession, group, format, dest_dir, fetch_wgs, fetch_meta, fe try: download_group(accession, group, format, dest_dir, fetch_wgs, fetch_meta, fetch_index) - print ('Download completed') + print ('Completed') except Exception: utils.print_error() sys.exit(1) diff --git a/python3/readGet.py b/python3/readGet.py index 8b5c586..0ba39a3 100644 --- a/python3/readGet.py +++ b/python3/readGet.py @@ -21,7 +21,7 @@ def set_parser(): parser.add_argument('-i', '--index', action='store_true', help="""Download CRAM index files with submitted CRAM files, if any (default is false). This flag is ignored for fastq and sra format options""") - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def check_read_format(format): @@ -132,7 +132,7 @@ def download_files(accession, format, dest_dir, fetch_index, fetch_meta): try: download_files(accession, format, dest_dir, fetch_index, fetch_meta) - print ('Download completed') + print ('Completed') except Exception: utils.print_error() sys.exit(1) diff --git a/python3/sequenceGet.py b/python3/sequenceGet.py index d90e7d3..8e410b1 100644 --- a/python3/sequenceGet.py +++ b/python3/sequenceGet.py @@ -11,11 +11,11 @@ def set_parser(): parser = argparse.ArgumentParser(prog='sequenceGet', description='Download sequence data for a given INSDC accession') parser.add_argument('accession', help='INSDC sequence/coding accession or WGS prefix (LLLLVV) to fetch') - parser.add_argument('-f', '--format', default='embl', choices=['embl', 'fasta'], - help='File format required (default is embl)') + parser.add_argument('-f', '--format', default='embl', choices=['embl', 'fasta', 'master'], + help='File format required (default is embl); master format only available for WGS') parser.add_argument('-d', '--dest', default='.', help='Destination directory (default is current running directory)') - parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.0') + parser.add_argument('-v', '--version', action='version', version='%(prog)s 1.1') return parser def append_record(dest_file, accession, format): @@ -30,10 +30,17 @@ def download_sequence(dest_dir, accession, format): print ('Unable to fetch file for ' + accession + ', format ' + format) def download_wgs(dest_dir, accession, format): + if utils.is_unversioned_wgs_set(accession): + return download_unversioned_wgs(dest_dir, accession, format) + else: + return download_versioned_wgs(dest_dir, accession, format) + +def download_versioned_wgs(dest_dir, accession, format): + prefix = accession[:6] if format is None: format = utils.EMBL_FORMAT - public_set_url = utils.get_wgs_ftp_url(accession, utils.PUBLIC, format) - supp_set_url = utils.get_wgs_ftp_url(accession, utils.SUPPRESSED, format) + public_set_url = utils.get_wgs_ftp_url(prefix, utils.PUBLIC, format) + supp_set_url = utils.get_wgs_ftp_url(prefix, utils.SUPPRESSED, format) success = utils.get_ftp_file(public_set_url, dest_dir) if not success: success = utils.get_ftp_file(supp_set_url, dest_dir) @@ -41,9 +48,25 @@ def download_wgs(dest_dir, accession, format): print ('No WGS set file available for ' + accession + ', format ' + format) print ('Please contact ENA (datasubs@ebi.ac.uk) if you feel this set should be available') +def download_unversioned_wgs(dest_dir, accession, format): + prefix = accession[:4] + if format is None: + format = utils.EMBL_FORMAT + public_set_url = utils.get_nonversioned_wgs_ftp_url(prefix, utils.PUBLIC, format) + if public_set_url is not None: + utils.get_ftp_file(public_set_url, dest_dir) + else: + supp_set_url = utils.get_nonversion_supp_wgs_ftp_url(prefix, format) + if supp_set_url is not None: + utils.get_ftp_file(supp_set_url, dest_dir) + else: + print ('No WGS set file available for ' + accession + ', format ' + format) + print ('Please contact ENA (datasubs@ebi.ac.uk) if you feel this set should be available') + def check_format(format): - if format not in [utils.EMBL_FORMAT, utils.FASTA_FORMAT]: - print ('Please select a valid format for this accession: ', [utils.EMBL_FORMAT, utils.FASTA_FORMAT]) + allowed_formats = [utils.EMBL_FORMAT, utils.FASTA_FORMAT, utils.MASTER_FORMAT] + if format not in allowed_formats: + print ('Please select a valid format for this accession: ', allowed_formats) sys.exit(1) if __name__ == '__main__': @@ -55,17 +78,20 @@ def check_format(format): dest_dir = args.dest try: - if utils.is_sequence(accession) or utils.is_coding(accession): + if utils.is_wgs_set(accession): + download_wgs(dest_dir, accession, format) + elif utils.is_sequence(accession) or utils.is_coding(accession): if not utils.is_available(accession): print ('Record does not exist or is not available for accession provided') sys.exit(1) + if format == utils.MASTER_FORMAT: + print ('Invalid format. master format only available for WGS sets') + sys.exit(1) download_sequence(dest_dir, accession, format) - elif utils.is_wgs_set(accession): - download_wgs(dest_dir, accession, format) else: - print ('Error: Invalid accession. A sequence or coding accession or a WGS set prefix (LLLLVV) must be provided') + print ('Error: Invalid accession. A sequence or coding accession or a WGS set (prefix or master accession) must be provided') sys.exit(1) - print ('Download completed') + print ('Completed') except Exception: utils.print_error() sys.exit(1) diff --git a/python3/utils.py b/python3/utils.py index 723c44b..56d62e3 100644 --- a/python3/utils.py +++ b/python3/utils.py @@ -3,6 +3,7 @@ # import base64 +import ftplib import re import os import ssl @@ -56,6 +57,7 @@ SEQUENCE_RELEASE_ID='sequence_release' WGS_FTP_BASE = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/wgs' +WGS_FTP_DIR = 'pub/databases/ena/wgs' PORTAL_SEARCH_BASE = 'http://www.ebi.ac.uk/ena/portal/api/search?' RUN_RESULT = 'result=read_run' @@ -79,6 +81,9 @@ sequence_pattern_3 = re.compile('^[A-Z]{4}[0-9]{8,9}(\.[0-9]+)?$') coding_pattern = re.compile('^[A-Z]{3}[0-9]{5}(\.[0-9]+)?$') wgs_prefix_pattern = re.compile('^[A-Z]{4}[0-9]{2}$') +wgs_master_pattern = re.compile('^[A-Z]{4}[0-9]{2}[0]{6}$') +unversion_wgs_prefix_pattern = re.compile('^[A-Z]{4}$') +unversion_wgs_master_pattern = re.compile('^[A-Z]{4}[0]{8}$') run_pattern = re.compile('^[EDS]RR[0-9]{6,7}$') experiment_pattern = re.compile('^[EDS]RX[0-9]{6,7}$') analysis_pattern = re.compile('^[EDS]RZ[0-9]{6,7}$') @@ -97,7 +102,14 @@ def is_coding(accession): return coding_pattern.match(accession) def is_wgs_set(accession): - return wgs_prefix_pattern.match(accession) + return wgs_prefix_pattern.match(accession) \ + or wgs_master_pattern.match(accession) \ + or unversion_wgs_prefix_pattern.match(accession) \ + or unversion_wgs_master_pattern.match(accession) + +def is_unversioned_wgs_set(accession): + return unversion_wgs_prefix_pattern.match(accession) \ + or unversion_wgs_master_pattern.match(accession) def is_run(accession): return run_pattern.match(accession) @@ -248,14 +260,32 @@ def get_ftp_file_with_md5_check(ftp_url, dest_dir, md5): except Exception: return False -def get_wgs_ftp_url(wgs_set, status, format): - base_url = WGS_FTP_BASE + '/' + status + '/' + wgs_set[:2].lower() + '/' + wgs_set +def get_wgs_file_ext(format): if format == EMBL_FORMAT: - return base_url + WGS_EMBL_EXT + return WGS_EMBL_EXT elif format == FASTA_FORMAT: - return base_url + WGS_FASTA_EXT + return WGS_FASTA_EXT elif format == MASTER_FORMAT: - return base_url + WGS_MASTER_EXT + return WGS_MASTER_EXT + +def get_wgs_ftp_url(wgs_set, status, format): + base_url = WGS_FTP_BASE + '/' + status + '/' + wgs_set[:2].lower() + '/' + wgs_set + return base_url + get_wgs_file_ext(format) + +def get_nonversioned_wgs_ftp_url(wgs_set, status, format): + ftp_url = 'ftp.ebi.ac.uk' + base_dir = WGS_FTP_DIR + '/' + status + '/' + wgs_set[:2].lower() + base_url = WGS_FTP_BASE + '/' + status + '/' + wgs_set[:2].lower() + ftp = ftplib.FTP(ftp_url) + ftp.login() + ftp.cwd(base_dir) + supp = ftp.nlst() + ftp.close() + files = [f for f in supp if f.startswith(wgs_set) and f.endswith(get_wgs_file_ext(format))] + if len(files) == 0: + return None + else: + return base_url + '/' + max(files) def get_report_from_portal(url): userAndPass = base64.b64encode(ANON_AUTH).decode("ascii")