Skip to content

Commit

Permalink
add metadata to proksee and add test accordingly
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanMainguy committed Nov 9, 2023
1 parent 28bc752 commit 7020706
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 15 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ jobs:
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db2 -m metadata/metadata_genomes.tsv -a genomes
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db3 -m metadata/metadata_families.tsv -a families --omit
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db4 -m metadata/metadata_rgps.tsv -a RGPs
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db5 -m metadata/metadata_contigs.tsv -a contigs
ppanggolin metadata -p mybasicpangenome/pangenome.h5 -s db6 -m metadata/metadata_modules.tsv -a modules
ppanggolin write_pangenome -p mybasicpangenome/pangenome.h5 --output mybasicpangenome -f --gexf --light_gexf --cpu 1
ppanggolin rgp_cluster --pangenome mybasicpangenome/pangenome.h5 -o rgp_cluster_with_metadata --graph_formats graphml
Expand Down Expand Up @@ -165,5 +169,6 @@ jobs:
-f --gff --add_metadata --metadata_sep §
# Pipe separatore is found in metadata source db1. if we don't require this source then the writting with pipe is work fine.
ppanggolin write_genomes -p mybasicpangenome/pangenome.h5 --output mybasicpangenome/genomes_outputs_with_metadata \
-f --gff --add_metadata --metadata_sources db2 db3 db4
ppanggolin write_genomes -p mybasicpangenome/pangenome.h5 --output mybasicpangenome/genomes_outputs_with_metadata -f --gff --proksee --add_metadata --metadata_sources db2 db3 db4
28 changes: 15 additions & 13 deletions ppanggolin/formats/write_proksee.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# local libraries
from ppanggolin.genome import Organism, Gene
from ppanggolin.region import Module, Region
from ppanggolin.pangenome import Pangenome



Expand Down Expand Up @@ -100,12 +99,12 @@ def write_tracks(features: List[str]):
return tracks


def initiate_proksee_data(features: List[str], org_name: str, module_to_color: Dict[Module, str]):
def initiate_proksee_data(features: List[str], organism: Organism, module_to_color: Dict[Module, str]):
"""
Initializes ProkSee data structure with legends, tracks, and captions.
:param features: A list of features to include in the ProkSee data.
:param org_name: The name of the organism for which the ProkSee data is being generated.
:param organism: The organism for which the ProkSee data is being generated.
:param module_to_color: A dictionary mapping modules to their assigned colors.
:return: ProkSee data structure containing legends, tracks, and captions.
Expand All @@ -114,7 +113,7 @@ def initiate_proksee_data(features: List[str], org_name: str, module_to_color: D
proksee_tracks = write_tracks(features)

proksee_captions = {
"name": f"{org_name} annotated with PPanGGOLiN",
"name": f"{organism.name} annotated with PPanGGOLiN",
"position": "bottom-center",
"font": "sans-serif,plain,18",
"backgroundColor": "rgba(255,255,255,0.4)"
Expand All @@ -128,6 +127,7 @@ def initiate_proksee_data(features: List[str], org_name: str, module_to_color: D
"tracks": proksee_tracks,
"sequence": {},
'captions': [proksee_captions],
"meta":organism.formatted_metadata_dict() # metadata
}

return {"cgview": cgview_data}
Expand All @@ -149,6 +149,7 @@ def write_contig(organism: Organism, genome_sequences: Dict[str, str] = None) ->
"name": contig.name,
"length": contig.length,
"orientation": "+",
"meta":contig.formatted_metadata_dict()
}

if genome_sequences:
Expand Down Expand Up @@ -176,8 +177,10 @@ def write_genes(organism: Organism, disable_bar: bool = True) -> Tuple[List[Dict
for gene in tqdm(organism.genes, total=organism.number_of_genes(), unit="genes", disable=disable_bar):
gf = gene.family
gf2gene[gf.name].append(gene)

genes_data_list.append({

metadata_for_proksee = {f"gene_{k}":v for k, v in gene.formatted_metadata_dict().items()}
metadata_for_proksee.update({f"family_{k}":v for k, v in gene.family.formatted_metadata_dict().items()})
genes_data_list.append({
"name": gene.name,
"type": "Gene",
"contig": gene.contig.name,
Expand All @@ -188,7 +191,7 @@ def write_genes(organism: Organism, disable_bar: bool = True) -> Tuple[List[Dict
"tags": [gene.family.named_partition, gene.family.name],
"source": "Gene",
"legend": gene.family.named_partition,
"meta": "" # annotations
"meta": metadata_for_proksee
})

# Process RNA genes
Expand All @@ -204,7 +207,7 @@ def write_genes(organism: Organism, disable_bar: bool = True) -> Tuple[List[Dict
"tags": [],
"source": "Gene",
"legend": "RNA",
"meta": "" # annotations
"meta": gene.formatted_metadata_dict()
})

return genes_data_list, gf2gene
Expand Down Expand Up @@ -237,7 +240,8 @@ def write_rgp(rgps: Region, organism: Organism, rgp_to_spot_id:Dict[Region,int]=
"stop": rgp.stop,
"legend": "RGP",
"source": "RGP",
"tags": tags
"tags": tags,
"meta": rgp.formatted_metadata_dict()
})

return rgp_data_list
Expand Down Expand Up @@ -275,9 +279,7 @@ def write_modules(modules: List[Module], organism: Organism, gf2genes: Dict[str,
"legend": f"module_{module.ID}",
"source": "Module",
"tags": [f'{completion}% complete'],
"meta": {
"completion": f"{completion}%"
}
"meta": module.formatted_metadata_dict()
})

return modules_data_list
Expand All @@ -301,7 +303,7 @@ def write_proksee_organism(organism: Organism, output_file: Path,
This function writes ProkSee data for a given organism, including contig information, genes colored by partition, RGPs,
and modules. The resulting data is saved as a JSON file in the specified output file.
"""
proksee_data = initiate_proksee_data(features, organism.name, module_to_colors)
proksee_data = initiate_proksee_data(features, organism, module_to_colors)

proksee_data["cgview"]["sequence"]["contigs"] = write_contig(organism, genome_sequences)

Expand Down
2 changes: 2 additions & 0 deletions testingDataset/metadata/metadata_contigs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
contigs Note
NZ_CP017732.1 Circular Chromosome
4 changes: 4 additions & 0 deletions testingDataset/metadata/metadata_modules.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
modules featureC featureD scoreE
module_0 0 mz_1 .1
module_1 30 mz_12 .3
module_2 10 mz_899 .1

0 comments on commit 7020706

Please sign in to comment.