-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathReproduce.py
executable file
·121 lines (99 loc) · 3.81 KB
/
Reproduce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
import getopt as getopt
import os
import sys
threads = 48
## Pass arguments
argv = sys.argv[1:]
if(len(argv)==0):
print("help: Reproduce.py -h")
sys.exit(2)
try:
opts, args = getopt.getopt(argv, 't:')
except:
print("help: Reproduce.py -h")
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print("help: Reproduce.py -h")
sys.exit()
elif opt in ("-t"):
threads = int(arg)
print("Threads : " + str(threads))
# Download data and executables
# if bin folder exists then delete it or else create it
if os.path.exists("bin"):
os.system("rm -rf bin")
os.system("mkdir bin")
os.system("wget -O MHC.agc https://zenodo.org/records/6617246/files/MHC-61.agc?download=1")
os.system("wget https://github.com/refresh-bio/agc/releases/download/v3.0/agc-3.0_x64-linux.tar.gz")
os.system("tar -xvf agc-3.0_x64-linux.tar.gz")
os.system("mv agc-3.0_x64-linux/agc bin/")
os.system("rm -rf agc-3.0_x64-linux*")
# Get vg and minigraph and put in bin folder
os.system("wget https://github.com/vgteam/vg/releases/download/v1.52.0/vg")
os.system("chmod +x vg")
os.system("mv vg bin/")
os.system("git clone https://github.com/lh3/minigraph")
os.system("cd minigraph && make -j")
os.system("cd ..")
os.system("mv minigraph/minigraph bin/")
os.system("rm -rf minigraph")
# Get mash
os.system("wget https://github.com/marbl/Mash/releases/download/v2.3/mash-Linux64-v2.3.tar")
os.system("tar -xvf mash-Linux64-v2.3.tar")
os.system("mv mash-Linux64-v2.3/mash bin/")
os.system("rm -rf mash-Linux64-v2.3*")
# Get minichain
os.system("git clone https://github.com/at-cg/minichain")
os.system("cd minichain && make -j")
os.system("cd ..")
os.system("mv minichain/minichain bin/")
os.system("cd minichain && git checkout v1.0 && make -j")
os.system("cd ..")
os.system("mv minichain/minichain bin/minichain_10")
os.system("rm -rf minichain")
os.system("chmod +x bin/*")
# if Genomes folder exists then delete it or else create it
if os.path.exists("Genomes"):
os.system("rm -rf Genomes")
os.system("mkdir Genomes")
# Extract the genomes from MHC.agc
os.system("bin/agc getcol -o Genomes/ MHC.agc")
os.system("rm MHC.agc")
os.system("rm Genomes/MHC-00GRCh38.fa")
# create conda environment named MC and install python packages numpy, scipy, matplotlib and networkx Biopython getopt seaborn pandas
# check if conda environment named MC exists or not
os.system("source ~/.bashrc && conda create --force -n MC -y && conda activate MC && conda install -c conda-forge -y numpy scipy matplotlib networkx biopython seaborn pandas rich pylatexenc")
map_threads = 6
# Generate the graph
# os.system("python3 Gen_Graph.py -t " + str(threads))
# Simulate queries
# os.system("source ~/.bashrc && conda activate MC && python3 Simulate_query.py -t " + str(threads))
os.system("mkdir -p Graphs")
os.system("mkdir -p Reads")
os.system("curl https://zenodo.org/api/records/10665350/files-archive -o Data.zip")
os.system("unzip Data.zip")
os.system("mv *.gfa.gz Graphs/")
os.system("mv *.fq.gz Reads/")
os.system("rm -rf Query_0.1")
os.system("rm -rf Query_1")
os.system("rm -rf Query_5")
os.system("tar -xvf Query_0.1.tar.gz")
os.system("tar -xvf Query_1.tar.gz")
os.system("tar -xvf Query_5.tar.gz")
os.system("cd Query_0.1 && gunzip *")
os.system("cd Query_1 && gunzip *")
os.system("cd Query_5 && gunzip *")
os.system("rm Query_0.1.tar.gz")
os.system("rm Query_1.tar.gz")
os.system("rm Query_5.tar.gz")
os.system("echo $(pwd)")
# Map the queries
os.system("source ~/.bashrc && conda activate MC && python3 Map_Graph.py -t " + str(threads))
# Map the reads
os.system("source ~/.bashrc && conda activate MC && python3 Map_Reads.py -t " + str(map_threads))
# Plot the results
os.system("source ~/.bashrc && conda activate MC && python3 Plot.py")
# Plot the results for mapping
os.system("source ~/.bashrc && conda activate MC && python3 Plot_Map.py")