Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix config outputs in workflow commands when set in config file #261

Merged
merged 2 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ppanggolin/formats/writeFlatPangenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,22 +636,23 @@ def write_persistent_duplication_statistics(pangenome: Pangenome, output: Path,
return single_copy_persistent

def write_summaries_in_tsv(summaries: List[Dict[str, Any]], output_file: Path,
dup_margin:float, soft_core:float):
dup_margin:float, soft_core:float, compress:bool = False):
"""
Writes summaries of organisms stored in a dictionary into a Tab-Separated Values (TSV) file.

:param summaries: A list containing organism summaries.
:param output_file: The Path specifying the output TSV file location.
:param soft_core: Soft core threshold used
:param dup_margin: minimum ratio of organisms in which family must have multiple genes to be considered duplicated
:param compress: Compress the file in .gz
"""
# Flatten the nested dictionaries within the summaries dictionary
flat_summaries = [flatten_nested_dict(summary_info) for summary_info in summaries]

# Create a DataFrame from the flattened summaries
df_summary = pd.DataFrame(flat_summaries)

with open(output_file, "w") as flout:
with write_compressed_or_not(output_file, compress) as flout:
flout.write(f"#soft_core={round(soft_core, 3)}\n")
flout.write(f"#duplication_margin={round(dup_margin, 3)}\n")

Expand Down Expand Up @@ -702,7 +703,7 @@ def write_stats(output: Path, soft_core: float = 0.95, dup_margin: float = 0.05,

summaries.append(organism_summary)

write_summaries_in_tsv(summaries, output_file= output / "genomes_statistics.tsv", dup_margin=dup_margin, soft_core=soft_core)
write_summaries_in_tsv(summaries, output_file= output / "genomes_statistics.tsv", dup_margin=dup_margin, soft_core=soft_core, compress=compress)

logging.getLogger("PPanGGOLiN").info("Done writing genome per genome statistics")

Expand Down
2 changes: 1 addition & 1 deletion ppanggolin/projection/projection.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def write_projection_results(pangenome: Pangenome, organisms: Set[Organism],
write_summaries_in_tsv(summaries,
output_file=output_file,
dup_margin=dup_margin,
soft_core=soft_core)
soft_core=soft_core, compress=compress)


def summarize_projected_genome(organism: Organism,
Expand Down
12 changes: 8 additions & 4 deletions ppanggolin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def read_compressed_or_not(file_or_file_path: Union[Path, BinaryIO, TextIOWrappe
return file_or_file_path


def write_compressed_or_not(file_path: Path, compress: bool = False) -> Union[gzip.GzipFile, TextIO]:
def write_compressed_or_not(file_path: Path, compress: bool = False) -> Union[gzip.GzipFile, TextIOWrapper]:
"""
Create a file-like object, compressed or not.

Expand Down Expand Up @@ -736,14 +736,18 @@ def manage_cli_and_config_args(subcommand: str, config_file: str, subcommand_to_
# overwrite write and draw default when not specified in config
if workflow_step == 'write_pangenome':
for out_flag in WRITE_PAN_FLAG_DEFAULT_IN_WF:
setattr(default_step_args, out_flag, True)
if out_flag not in config[workflow_step]:
setattr(default_step_args, out_flag, True)

if workflow_step == 'write_genomes':
for out_flag in WRITE_GENOME_FLAG_DEFAULT_IN_WF:
setattr(default_step_args, out_flag, True)
if out_flag not in config[workflow_step]:
setattr(default_step_args, out_flag, True)

if workflow_step == "draw":
for out_flag in DRAW_FLAG_DEFAULT_IN_WF:
setattr(default_step_args, out_flag, True)
if out_flag not in config[workflow_step]:
setattr(default_step_args, out_flag, True)

step_args = overwrite_args(default_step_args, config_step_args, cli_args)

Expand Down
2 changes: 1 addition & 1 deletion ppanggolin/workflow/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def launch_workflow(args: argparse.Namespace, panrgp: bool = True,

start_desc = time.time()

write_pangenome_arguments = ["csv", "Rtab", "gexf", "light_gexf", "projection", "stats", 'json', "families_tsv"]
write_pangenome_arguments = ["gexf", "light_gexf", 'json', "csv", "Rtab", "stats", "partitions", "families_tsv"]

# Check that we don't ask write to output something not computed.
borders, spots, spot_modules, modules, regions = (False, False, False, False, False)
Expand Down