Skip to content

Commit

Permalink
Merge pull request #25 from rcsb/dev-dwp
Browse files Browse the repository at this point in the history
V1.23 Add support for target cofactor data loading to Mongo
  • Loading branch information
piehld authored Aug 21, 2024
2 parents f79d637 + fcba4dd commit 4ce0f46
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 11 deletions.
1 change: 1 addition & 0 deletions HISTORY.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,4 @@
9-May-2024 V1.2 Adjust provider type exclusion input to accept a list of types
13-May-2024 V1.21 Update requirements; fix linting
17-Jul-2024 V1.22 Adjust LigandNeighborMappingExtractor
20-Aug-2024 V1.23 Add support for target cofactor data loading
15 changes: 13 additions & 2 deletions rcsb/exdb/cli/ExDbExec.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# 9-Mar-2023 dwp Lower refChunkSize to 10 (UniProt API having trouble streaming XML responses)
# 25-Apr-2024 dwp Add arguments and logic to support CLI usage from weekly-update workflow;
# Add support for logging output to a specific file
# 20-Aug-2024 dwp Add load_target_cofactors operation; change name of upd_targets_cofactors to upd_targets
##
__docformat__ = "google en"
__author__ = "John Westbrook"
Expand Down Expand Up @@ -46,7 +47,8 @@ def main():
"upd_ref_seq", # Update reference sequence assignments
"upd_neighbor_interactions",
"upd_uniprot_taxonomy",
"upd_targets_cofactors",
"upd_targets",
"load_target_cofactors",
"upd_pubchem",
"upd_entry_info",
"upd_glycan_idx",
Expand Down Expand Up @@ -108,7 +110,16 @@ def main():
exWf = ExDbWorkflow(**commonD)
if op in ["etl_chemref", "etl_uniprot_core", "etl_tree_node_lists", "upd_ref_seq"]:
okR = exWf.load(op, **loadD)
elif op in ["upd_neighbor_interactions", "upd_uniprot_taxonomy", "upd_targets_cofactors", "upd_pubchem", "upd_entry_info", "upd_glycan_idx", "upd_resource_stash"]:
elif op in [
"upd_neighbor_interactions",
"upd_uniprot_taxonomy",
"upd_targets",
"load_target_cofactors",
"upd_pubchem",
"upd_entry_info",
"upd_glycan_idx",
"upd_resource_stash",
]:
okR = exWf.buildExdbResource(op, **loadD)
else:
logger.error("Unsupported op %r", op)
Expand Down
2 changes: 1 addition & 1 deletion rcsb/exdb/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
__author__ = "John Westbrook"
__email__ = "[email protected]"
__license__ = "Apache 2.0"
__version__ = "1.22"
__version__ = "1.23"
28 changes: 25 additions & 3 deletions rcsb/exdb/wf/ExDbWorkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# 2-Mar-2023 dwp Add "numProc" parameter to 'upd_ref_seq' operation methods
# 9-Mar-2023 dwp Lower refChunkSize to 10 (UniProt API having trouble streaming XML responses)
# 25-Apr-2024 dwp Add arguments and methods to support CLI usage from weekly-update workflow
# 20-Aug-2024 dwp Add LoadTargetCofactors step; change name of UpdateTargetsCofactors step to UpdateTargetsData
##
__docformat__ = "google en"
__author__ = "John Westbrook"
Expand Down Expand Up @@ -274,7 +275,16 @@ def buildExdbResource(self, op, **kwargs):
logger.info("Starting operation %r\n", op)
#
# argument processing
if op not in ["upd_neighbor_interactions", "upd_uniprot_taxonomy", "upd_targets_cofactors", "upd_pubchem", "upd_entry_info", "upd_glycan_idx", "upd_resource_stash"]:
if op not in [
"upd_neighbor_interactions",
"upd_uniprot_taxonomy",
"upd_targets",
"load_target_cofactors",
"upd_pubchem",
"upd_entry_info",
"upd_glycan_idx",
"upd_resource_stash",
]:
logger.error("Unsupported operation %r - exiting", op)
return False
try:
Expand Down Expand Up @@ -324,8 +334,8 @@ def buildExdbResource(self, op, **kwargs):
ok = ptsWf.updateUniProtTaxonomy() and ok
logger.info("updateUniProtTaxonomy status %r", ok)
#
elif op == "upd_targets_cofactors":
logger.info("Starting UpdateTargetsCofactors")
elif op == "upd_targets":
logger.info("Starting UpdateTargetsData")
ptsWf = ProteinTargetSequenceExecutionWorkflow(
configPath=self.__configPath,
mockTopPath=self.__mockTopPath,
Expand All @@ -352,6 +362,18 @@ def buildExdbResource(self, op, **kwargs):
logger.info("buildCofactorData status %r", ok)
ptsWf.resourceCheck()
#
elif op == "load_target_cofactors":
logger.info("Starting LoadTargetCofactors")
ptsWf = ProteinTargetSequenceExecutionWorkflow(
configPath=self.__configPath,
mockTopPath=self.__mockTopPath,
configName=self.__configName,
cachePath=self.__cachePath,
)
ok = ptsWf.loadTargetCofactorData()
logger.info("loadTargetCofactorData status %r", ok)
ptsWf.resourceCheck()
#
elif op == "upd_pubchem":
# -- Update local chemical indices and create PubChem mapping index ---
logger.info("Starting workflow PubChemEtlWorkflow")
Expand Down
12 changes: 7 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
OpenEye-toolkits >= 2020.2.2
numpy
jsonschema >= 2.6.0
rcsb.utils.io >= 1.46
rcsb.db >= 1.721
rcsb.utils.io >= 1.48
rcsb.db >= 1.723
rcsb.utils.chem >= 0.79
rcsb.utils.chemref >= 0.91
rcsb.utils.citation >= 0.22
rcsb.utils.config >= 0.40
rcsb.utils.ec >= 0.25
rcsb.utils.go >= 0.18
rcsb.utils.seq >= 0.79
rcsb.utils.seq >= 0.82
rcsb.utils.seqalign >= 0.29
rcsb.utils.targets >= 0.80
rcsb.utils.struct >= 0.47
rcsb.utils.taxonomy >= 0.43
rcsb.utils.dictionary >= 1.24
rcsb.workflow >= 0.42
rcsb.utils.dictionary >= 1.26
rcsb.workflow >= 0.44
statistics; python_version < "3.0"

0 comments on commit 4ce0f46

Please sign in to comment.