Skip to content

Commit

Permalink
Integrate ModelPolisher in CMPB #8
Browse files Browse the repository at this point in the history
  • Loading branch information
niinina committed Nov 29, 2024
1 parent 225d207 commit 8f0efab
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 11 deletions.
52 changes: 41 additions & 11 deletions src/specimen/cmpb/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import logging
import pandas as pd
from datetime import date
import model_polisher as mp
from pathlib import Path
from typing import Union

Expand Down Expand Up @@ -140,6 +141,7 @@ def between_analysis(model: Model, cfg:dict, step:str):
Path(dir,"cmpb_out",'misc', 'gapfill').mkdir(parents=True, exist_ok=False) # |- gapfill
Path(dir,"cmpb_out",'misc', 'growth').mkdir(parents=True, exist_ok=False) # |- growth
Path(dir,"cmpb_out",'misc', 'stats').mkdir(parents=True, exist_ok=False) # |- stats
Path(dir,"cmpb_out",'misc', 'modelpolisher').mkdir(parents=True, exist_ok=False) # |- modelpolisher
Path(dir,"cmpb_out",'misc', 'kegg_pathway').mkdir(parents=True, exist_ok=False) # |- kegg_pathways
Path(dir,"cmpb_out",'misc', 'auxotrophy').mkdir(parents=True, exist_ok=False) # |- auxothrophy

Expand Down Expand Up @@ -216,7 +218,8 @@ def between_analysis(model: Model, cfg:dict, step:str):
current_modelpath = Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_after_KEGG_gapfill.xml')
else:
write_model_to_file(current_libmodel, str(only_modelpath))
current_model = load_model(current_modelpath,'cobra')
current_modelpath = only_modelpath
current_model = load_model(str(current_modelpath),'cobra')
else:
mes = f'No KEGG organism ID provided. Gapfilling with KEGG will be skipped.'
raise warnings.warn(mes,UserWarning)
Expand Down Expand Up @@ -244,7 +247,8 @@ def between_analysis(model: Model, cfg:dict, step:str):
current_modelpath = Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_after_BioCyc_gapfill.xml')
else:
write_model_to_file(current_libmodel, str(only_modelpath))
current_model = load_model(current_modelpath,'cobra')
current_modelpath = only_modelpath
current_model = load_model(str(current_modelpath),'cobra')

# GeneGapFiller
if config['gapfilling']['GeneGapFiller']:
Expand Down Expand Up @@ -275,10 +279,11 @@ def between_analysis(model: Model, cfg:dict, step:str):
if config['general']['save_all_models']:
write_model_to_file(current_libmodel, str(Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_after_Gene_gapfill.xml')))
current_modelpath = Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_after_Gene_gapfill.xml')
current_model = load_model(current_modelpath,'cobra')
else:
write_model_to_file(current_libmodel, str(only_modelpath))
current_model = load_model(current_modelpath,'cobra')
current_modelpath = only_modelpath

current_model = load_model(str(current_modelpath),'cobra')

# testing
if run_gapfill:
Expand All @@ -287,10 +292,32 @@ def between_analysis(model: Model, cfg:dict, step:str):

# ModelPolisher
###############
# @TODO
# future update
# currently being revamped
# and python access is coming soon
if config['modelpolisher']:
config_mp = {"allow-model-to-be-saved-on-server": config["mp"]["allow-model-to-be-saved-on-server"],
"fixing": {"dont-fix": config["mp"]["fixing"]["dont-fix"]},
"annotation": {"bigg": {"annotate-with-bigg": config["mp"]["annotation"]["bigg"]["annotate-with-bigg"],
"include-any-uri": config["mp"]["annotation"]["bigg"]["include-any-uri"]}}}

result = mp.polish_model_file(current_modelpath, config_mp)

# @DISCUSSION Should the run-id be saved somewhere for debugging purposes? result['run_id']
pd.DataFrame(result['diff']).to_csv(Path(dir,'cmpb_out','misc','modelpolisher','diff_mp.csv'), sep=';', header=False)
pd.DataFrame(result['pre_validation']).to_csv(Path(dir,'cmpb_out','misc','modelpolisher','pre_validation.csv'), sep=';', header=True)
pd.DataFrame(result['post_validation']).to_csv(Path(dir,'cmpb_out','misc','modelpolisher','post_validation.csv'), sep=';', header=True)

# save model
if config['general']['save_all_models']:
write_model_to_file(result["polished_document"].getModel(), str(Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_after_ModelPolisher.xml')))
current_modelpath = Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_after_ModelPolisher.xml')
else:
write_model_to_file(result["polished_document"].getModel(), str(only_modelpath))
current_modelpath = only_modelpath

current_model = load_model(str(current_modelpath),'cobra')

# in-between testing
between_growth_test(current_model,config,step='after_ModelPolisher')
between_analysis(current_model, config, step='after_ModelPolisher')

# Annotations
#############
Expand All @@ -307,7 +334,8 @@ def between_analysis(model: Model, cfg:dict, step:str):
write_model_to_file(current_libmodel, str(Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_added_KeggPathwayGroups.xml')))
current_modelpath = Path(dir,'cmpb_out','models',f'{current_libmodel.getId()}_added_KeggPathwayGroups.xml')
else:
write_model_to_file(current_libmodel, str(current_modelpath))
write_model_to_file(current_libmodel, str(only_modelpath))
current_modelpath = only_modelpath

# SBOannotator
# ------------
Expand All @@ -319,7 +347,8 @@ def between_analysis(model: Model, cfg:dict, step:str):
current_modelpath = Path(dir,'cmpb_out','models', f'{current_libmodel.getId()}_SBOannotated.xml')
else:
current_libmodel = run_SBOannotator(current_libmodel)
write_model_to_file(current_libmodel, str(current_modelpath))
write_model_to_file(current_libmodel, str(only_modelpath))
current_modelpath = only_modelpath

current_model = load_model(str(current_modelpath),'cobra')
between_analysis(current_model,config,step='after_annotation')
Expand Down Expand Up @@ -413,7 +442,8 @@ def between_analysis(model: Model, cfg:dict, step:str):
write_model_to_file(current_model, str(Path(dir,'cmpb_out','models',f'{current_model.id}_after_BOF.xml')))
current_modelpath = Path(dir,'cmpb_out','models',f'{current_model.id}_after_BOF.xml')
else:
write_model_to_file(current_model, str(current_modelpath))
write_model_to_file(current_model, str(only_modelpath))
current_modelpath = only_modelpath

# MCC
# ---
Expand Down
11 changes: 11 additions & 0 deletions src/specimen/data/config/cmpb_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,17 @@ gapfilling:
percentage identity: 90.0 # Percentage identity threshold value for accepting
# matches found by DIAMOND as homologous.

# Polish the model using ModelPolisher
# ------------------------------------
modelpolisher: True # Activate ModelPolisher
mp:
allow-model-to-be-saved-on-server: False # Enable saving the model on the server
fixing:
dont-fix: False # Sets unset default values that are mandatory
annotation:
bigg:
annotate-with-bigg: True # Enable annotation with BiGG
include-any-uri: True # Enable adding annotations that are not MIRIAM-compliant

# Add KEGG pathways as groups, optional
# -------------------------------------
Expand Down
22 changes: 22 additions & 0 deletions src/specimen/util/set_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,28 @@ def save_cmpb_user_input(configpath:Union[str,None]=None) -> dict:
if another_gapfiller:
algorithm = click.prompt('Which algorithm do you want to use for gapfilling?', type=click.Choice(['KEGGapFiller','BioCycGapFiller','GeneGapFiller']), show_choices=True)

# ModelPolisher
modelpolisher = click.prompt('Do you want to run ModelPolisher?', type=click.Choice(['y','n']), show_choices=True)
match modelpolisher:
case 'y':
config['modelpolisher'] = True
allow_model_to_be_saved_on_server = click.prompt('Do you want to allow the model to be saved on the server?', type=click.Choice(['y','n']), show_choices=True)
allow_model_to_be_saved_on_server = True if allow_model_to_be_saved_on_server == 'y' else False
config['mp']['allow-model-to-be-saved-on-server'] = allow_model_to_be_saved_on_server

dont_fix = click.prompt('Do you want to fix the model? Unset default values will be set, if they are mandatory.', type=click.Choice(['y','n']), show_choices=True)
dont_fix = False if dont_fix == 'y' else True
config['mp']['fixing']['dont-fix'] = dont_fix

annotate_with_bigg = click.prompt('Do you want to annotate with BiGG?', type=click.Choice(['y','n']), show_choices=True)
annotate_with_bigg = True if annotate_with_bigg == 'y' else False
config['mp']['annotation']['bigg']['annotate-with-bigg'] = annotate_with_bigg
include_any_uri = click.prompt('Do you want to include annotation that are not MIRIAM-compliant?', type=click.Choice(['y','n']), show_choices=True)
include_any_uri = True if include_any_uri == 'y' else False
config['mp']['annotation']['bigg']['include-any-uri'] = include_any_uri
case 'n':
config['modelpolisher'] = False

# kegg pathways as groups
kegg_pw_groups = click.prompt('Do you want to add KEGG pathways as groups to the model?', type=click.Choice(['y','n']), show_choices=True)
match kegg_pw_groups:
Expand Down

0 comments on commit 8f0efab

Please sign in to comment.