Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed some issues in download module #354

Merged
merged 5 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion relecov_tools/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
stderr=True, force_terminal=relecov_tools.utils.rich_force_colors()
)

__version__ = "1.3.0"


def run_relecov_tools():
# Set up the rich traceback
Expand Down Expand Up @@ -64,7 +66,6 @@ def run_relecov_tools():
)

# stderr.print("[green] `._,._,'\n", highlight=False)
__version__ = "1.3.0"
stderr.print(
"\n" "[grey39] RELECOV-tools version {}".format(__version__), highlight=False
)
Expand Down Expand Up @@ -135,6 +136,7 @@ def relecov_tools_cli(verbose, log_file):
)
)
log.addHandler(log_fh)
log.info(f"RELECOV-tools version {__version__}")


# sftp
Expand Down
54 changes: 30 additions & 24 deletions relecov_tools/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,23 +1127,9 @@ def download(self, target_folders):
if saved_files:
successful_files.extend(saved_files)
if corrupted:
corr_fold = os.path.join(local_folder, "corrupted")
os.mkdir(corr_fold)
error_text = "Found corrupted files: %s. Moved to: %s"
stderr.print(f"[red]{error_text % (str(corrupted), corr_fold)}")
self.include_warning(error_text % (str(corrupted), corr_fold))
for corr_file in corrupted:
path = os.path.join(local_folder, corr_file)
try:
os.rename(path, os.path.join(corr_fold, corr_file))
except (FileNotFoundError, PermissionError, OSError) as e:
error_text = (
"Could not move corrupted file %s to %s: %s"
)
log.error(error_text % (path, corr_fold, e))
stderr.print(
f"[red]{error_text % (path, corr_fold, e)}"
)
error_text = "Found corrupted files: %s. Removed"
stderr.print(f"[red]{error_text % (str(corrupted))}")
self.include_warning(error_text % (str(corrupted)))
if self.abort_if_md5_mismatch:
error_text = "Stop processing %s due to corrupted files."
stderr.print(f"[red]{error_text % folder}")
Expand Down Expand Up @@ -1181,16 +1167,40 @@ def download(self, target_folders):
else:
clean_fetchlist = seqs_fetchlist
clean_pathlist = [os.path.join(local_folder, fi) for fi in clean_fetchlist]

for file in clean_fetchlist:
full_f_path = os.path.join(local_folder, file)
if not relecov_tools.utils.check_gzip_integrity(full_f_path):
corrupted.append(file)

for sample_id, files in list(valid_filedict.items()):
if any(
files.get(key) in corrupted
for key in ["sequence_file_R1_fastq", "sequence_file_R2_fastq"]
):
for file_name in files.values():
path = os.path.join(local_folder, file_name)
try:
os.remove(path)
log.info(
"File %s was removed because it was corrupted",
file_name,
)
except (FileNotFoundError, PermissionError, OSError) as e:
error_text = "Could not remove corrupted file %s: %s"
log.error(error_text % (path, e))
stderr.print(f"[red]{error_text % (path, e)}")

not_md5sum = []
if remote_md5sum:
# Get hashes from provided md5sum, create them for those not provided
files_md5_dict = {}
for path in clean_pathlist:
f_name = os.path.basename(path)
if f_name in successful_files:
files_md5_dict[f_name] = hash_dict[f_name]
elif f_name in corrupted:
if f_name in corrupted:
clean_fetchlist.remove(f_name)
elif f_name in successful_files:
files_md5_dict[f_name] = hash_dict[f_name]
else:
if not str(f_name).rstrip(".gz") in files_to_compress:
error_text = "File %s not found in md5sum. Creating hash"
Expand All @@ -1204,10 +1214,6 @@ def download(self, target_folders):
relecov_tools.utils.calculate_md5(path) for path in clean_pathlist
]
files_md5_dict = dict(zip(clean_fetchlist, md5_hashes))
for file in files_md5_dict.keys():
full_f_path = os.path.join(local_folder, file)
if not relecov_tools.utils.check_gzip_integrity(full_f_path):
corrupted.append(file)
files_md5_dict = {
x: y for x, y in files_md5_dict.items() if x not in corrupted
}
Expand Down
Loading