-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathformose_reaction.py
executable file
·62 lines (45 loc) · 2.08 KB
/
formose_reaction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# This flag will be used to toggle one pair of extra Cannizarro 2 rules
with_formaldehyde = True
include("../main.py")
include("../mod_to_neo4j_exporter.py")
postChapter("Formose Reaction")
formaldehyde = smiles("C=O", name="Formaldehyde")
glycoladehyde = smiles("OCC=O", name="Glycolaldehyde")
water = smiles("O", name="Water")
'''dg = DG.load(inputGraphs, inputRules, "formose_6rounds_dec21.dg")
print("Finished loading from dump file")'''
# Number of generations we want to perform
generations = 3
dg = DG(graphDatabase=inputGraphs,
labelSettings=LabelSettings(LabelType.Term, LabelRelation.Specialisation))
subset = inputGraphs
universe = []
# dump initial reactants as part of "G0"
write_gen_output(subset, generation=0, reaction_name="formose")
with dg.build() as b:
for gen in range(generations):
start_time = time.time()
print(f"Starting round {gen+1}")
res = b.execute(addSubset(subset) >> addUniverse(universe) >> strat, verbosity=8)
end_time = time.time()
print(f"Took {end_time - start_time} seconds to complete round {gen+1}")
print(f'Products in generation {gen+1}:', len(res.subset))
# The returned subset and universe do not contain redundant tautomers
#subset, universe = clean_taut(dg, res, algorithm="CMI")
subset, universe = res.subset, res.universe
#print('Product set size after removal:', len(subset))
# This step replaces the previous subset (containing tautomers) with the cleaned subset
#res = b.execute(addSubset(subset) >> addUniverse(universe))
#export_to_neo4j(dg_obj = dg, generation_num = gen)
write_gen_output(subset, gen+1, reaction_name="formose")
print('Completed')
# Dump the dg so it can be loaded again quickly without having to generate it from scratch.
f = dg.dump()
print("Dump file: ", f)
#check_sdf_matches(dg, "../../data/FormoseTestSetPlus5.sdf")
#count_rules_by_gen(dg, 'formose_output.txt')
enol = smiles("[C]=[C]O", name="enol substruct", add=False)
diol = smiles("O[C]O", name="diol substruct", add=False)
print_reaction(dg, 'Aldol')
find_substruct_producer(dg, enol, print_rule=True)
find_substruct_producer(dg, diol, print_rule=True)