From d3e24c44b1510899b6be7bf0fb4687a73d4edd59 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 2 Aug 2024 13:03:29 +0200 Subject: [PATCH 1/2] fix default output when specified in config in wf cmds --- ppanggolin/utils.py | 12 ++++++++---- ppanggolin/workflow/all.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ppanggolin/utils.py b/ppanggolin/utils.py index b9e66fd3..a52f7039 100755 --- a/ppanggolin/utils.py +++ b/ppanggolin/utils.py @@ -276,7 +276,7 @@ def read_compressed_or_not(file_or_file_path: Union[Path, BinaryIO, TextIOWrappe return file_or_file_path -def write_compressed_or_not(file_path: Path, compress: bool = False) -> Union[gzip.GzipFile, TextIO]: +def write_compressed_or_not(file_path: Path, compress: bool = False) -> Union[gzip.GzipFile, TextIOWrapper]: """ Create a file-like object, compressed or not. @@ -736,14 +736,18 @@ def manage_cli_and_config_args(subcommand: str, config_file: str, subcommand_to_ # overwrite write and draw default when not specified in config if workflow_step == 'write_pangenome': for out_flag in WRITE_PAN_FLAG_DEFAULT_IN_WF: - setattr(default_step_args, out_flag, True) + if out_flag not in config[workflow_step]: + setattr(default_step_args, out_flag, True) + if workflow_step == 'write_genomes': for out_flag in WRITE_GENOME_FLAG_DEFAULT_IN_WF: - setattr(default_step_args, out_flag, True) + if out_flag not in config[workflow_step]: + setattr(default_step_args, out_flag, True) if workflow_step == "draw": for out_flag in DRAW_FLAG_DEFAULT_IN_WF: - setattr(default_step_args, out_flag, True) + if out_flag not in config[workflow_step]: + setattr(default_step_args, out_flag, True) step_args = overwrite_args(default_step_args, config_step_args, cli_args) diff --git a/ppanggolin/workflow/all.py b/ppanggolin/workflow/all.py index e884f0bf..2e3bce12 100644 --- a/ppanggolin/workflow/all.py +++ b/ppanggolin/workflow/all.py @@ -209,7 +209,7 @@ def launch_workflow(args: argparse.Namespace, panrgp: bool = True, start_desc = time.time() - write_pangenome_arguments = ["csv", "Rtab", "gexf", "light_gexf", "projection", "stats", 'json', "families_tsv"] + write_pangenome_arguments = ["gexf", "light_gexf", 'json', "csv", "Rtab", "stats", "partitions", "families_tsv"] # Check that we don't ask write to output something not computed. borders, spots, spot_modules, modules, regions = (False, False, False, False, False) From 5957ba5d95ddcaf28ff0b93e0fb39e5ba08c7eb8 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 2 Aug 2024 13:04:24 +0200 Subject: [PATCH 2/2] add possibility to compress genomes_statistics.tsv output --- ppanggolin/formats/writeFlatPangenome.py | 7 ++++--- ppanggolin/projection/projection.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ppanggolin/formats/writeFlatPangenome.py b/ppanggolin/formats/writeFlatPangenome.py index 5f49d807..48c245c2 100644 --- a/ppanggolin/formats/writeFlatPangenome.py +++ b/ppanggolin/formats/writeFlatPangenome.py @@ -636,7 +636,7 @@ def write_persistent_duplication_statistics(pangenome: Pangenome, output: Path, return single_copy_persistent def write_summaries_in_tsv(summaries: List[Dict[str, Any]], output_file: Path, - dup_margin:float, soft_core:float): + dup_margin:float, soft_core:float, compress:bool = False): """ Writes summaries of organisms stored in a dictionary into a Tab-Separated Values (TSV) file. @@ -644,6 +644,7 @@ def write_summaries_in_tsv(summaries: List[Dict[str, Any]], output_file: Path, :param output_file: The Path specifying the output TSV file location. :param soft_core: Soft core threshold used :param dup_margin: minimum ratio of organisms in which family must have multiple genes to be considered duplicated + :param compress: Compress the file in .gz """ # Flatten the nested dictionaries within the summaries dictionary flat_summaries = [flatten_nested_dict(summary_info) for summary_info in summaries] @@ -651,7 +652,7 @@ def write_summaries_in_tsv(summaries: List[Dict[str, Any]], output_file: Path, # Create a DataFrame from the flattened summaries df_summary = pd.DataFrame(flat_summaries) - with open(output_file, "w") as flout: + with write_compressed_or_not(output_file, compress) as flout: flout.write(f"#soft_core={round(soft_core, 3)}\n") flout.write(f"#duplication_margin={round(dup_margin, 3)}\n") @@ -702,7 +703,7 @@ def write_stats(output: Path, soft_core: float = 0.95, dup_margin: float = 0.05, summaries.append(organism_summary) - write_summaries_in_tsv(summaries, output_file= output / "genomes_statistics.tsv", dup_margin=dup_margin, soft_core=soft_core) + write_summaries_in_tsv(summaries, output_file= output / "genomes_statistics.tsv", dup_margin=dup_margin, soft_core=soft_core, compress=compress) logging.getLogger("PPanGGOLiN").info("Done writing genome per genome statistics") diff --git a/ppanggolin/projection/projection.py b/ppanggolin/projection/projection.py index d731e92c..11e4bc2a 100644 --- a/ppanggolin/projection/projection.py +++ b/ppanggolin/projection/projection.py @@ -330,7 +330,7 @@ def write_projection_results(pangenome: Pangenome, organisms: Set[Organism], write_summaries_in_tsv(summaries, output_file=output_file, dup_margin=dup_margin, - soft_core=soft_core) + soft_core=soft_core, compress=compress) def summarize_projected_genome(organism: Organism,