-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathNLRtracker.sh
executable file
·163 lines (151 loc) · 4.7 KB
/
NLRtracker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/bin/bash
# log
if [ ! -d log ]; then
mkdir log
fi
LOG_DATE=`date '+%Y-%m-%d-%H:%M:%S'`
exec 1> >(tee -a log/${LOG_DATE}_out.log)
exec 2> >(tee -a log/${LOG_DATE}_err.log)
# directory name
dir_name=`dirname $0`
# help
function usage {
cat <<EOM
Usage: $(basename "$0") [OPTION]...
-h Display help
(required)
-s Filepath File path to amino acid(/nucleotide seqence) file (.fasta)
nucleotide seqence requires -t option.
-o String Directory name to save output
(optional)
-i Filepath Result of Interproscan (.gff3)
-f Filepath Result of FIMO (.gff)
-t String Seqtype of fasta file. dna/rna ("n") or protein ("p")
Default: "p"
-c Integer Number of CPUs for interproscan
Default: 2
-m Filepath meme.xml for use with FIMO
Default: module/meme.xml (from NLR Annotator)
-x Filepath hmm for use with HMMER
Default: module/abe3069_Data_S1.hmm (from Ma et al., 2020)
-d Filepath Description of Interproscan
Default: module/InterProScan 5.53-87.0.list
EOM
exit 2
}
# check options
echo -e "\n---------------------- input & option -----------------------";
while getopts ":s:i:f:t:c:m:x:d:o:h" optKey; do
case "$optKey" in
s)
if [ -f ${OPTARG} ]; then
echo "Fasta file = ${OPTARG}"
fasta=${OPTARG}
FLG_S=1
else
echo "${OPTARG} does not exits."
fi
;;
i)
if [ -f ${OPTARG} ]; then
echo "result of interproscan = ${OPTARG}"
interpro_result=${OPTARG}
FLG_I=1
else
echo "${OPTARG} does not exits. Run interproscan in this pipeline."
fi
;;
f)
if [ -f ${OPTARG} ]; then
echo "result of FIMO = ${OPTARG}"
FIMO_result=${OPTARG}
FLG_F=1
else
echo "${OPTARG} does not exits. Run FIMO in this pipeline."
fi
;;
t)
echo "Seqtype of fasta = ${OPTARG}"
Seqtype=${OPTARG}
;;
c)
echo "Number of CPUs = ${OPTARG}"
CPU=${OPTARG}
;;
m)
echo "xml for FIMO = ${OPTARG}"
XML=${OPTARG}
;;
x)
echo "hmm for HMMER = ${OPTARG}"
HMM=${OPTARG}
;;
d)
echo "Description of Interpro = ${OPTARG}"
Int_Desc=${OPTARG}
;;
o)
FLG_O=1
echo "output directory = ${OPTARG}"
outdir=${OPTARG}
;;
'-h'|'--help'|* )
usage
;;
esac
done
echo -e "\n---------------------- input & option -----------------------";
# check fasta file
if [ -z $FLG_S ]; then
echo -e "$(basename $0) : -s option is required\n"
usage
exit 1
fi
# check header
if [ -z $FLG_O ]; then
echo -e "$(basename $0) : -o option is required\n"
usage
exit 1
fi
# Main pipeline
mkdir $outdir
cat $fasta | awk '{if ($1 ~ /^>/) print "\n"$1; else printf $1}' | sed -e '1d' > ${outdir}/tmp.fasta
fasta=${outdir}/tmp.fasta
# 1. Interproscan
if [ -z $FLG_I ]; then
echo -e "\nRun Interproscan"
interproscan.sh -version
echo -e "\ninterproscan.sh -i $fasta -f gff3 -t ${Seqtype:-p} -o ${outdir}/interpro_result.gff -cpu ${CPU:-2} -appl Pfam,Gene3D,SUPERFAMILY,PRINTS,SMART,CDD,ProSiteProfiles"
interproscan.sh -i $fasta -f gff3 -t ${Seqtype:-"p"} -o "${outdir}/interpro_result.gff" -cpu ${CPU:-2} -appl Pfam,Gene3D,SUPERFAMILY,PRINTS,SMART,CDD,ProSiteProfiles -dp
interpro_result="${outdir}/interpro_result.gff"
else
echo -e "\nPass Interproscan (Use $interpro_result as output of Interproscan)"
fi
# 2. FIMO
if [ -z $FLG_F ]; then
echo -e "\nRun FIMO"
echo -e "\nfimo -o ${outdir}/fimo_out ${XML:-${dir_name}/module/meme.xml} $fasta"
fimo -o "${outdir}/fimo_out" ${XML:-"${dir_name}/module/meme.xml"} $fasta
FIMO_result="${outdir}/fimo_out/fimo.gff"
else
echo -e "\nPass FIMO (Use $FIMO_result as output of FIMO)"
fi
# 3. HMMER hmmsearch
if [ ${Seqtype:-"p"} = $"p" ]; then
echo -e "\nRun HMMER"
echo -e "\nhmmsearch --domtblout ${outdir}/CJID.txt ${HMM:-"${dir_name}/module/abe3069_Data_S1.hmm"} $fasta"
hmmsearch --domtblout "${outdir}/CJID.txt" ${HMM:-"${dir_name}/module/abe3069_Data_S1.hmm"} $fasta
hmmer_result="${outdir}/CJID.txt"
else
echo -e "hmmsearch not executed"
fi
# 4. NLR_extractor.R
if [ -f $interpro_result -a -f $FIMO_result ]; then
echo -e "\nRun NLRtracker"
Rscript ${dir_name}/module/NLRtracker.R ${Int_Desc:-"${dir_name}/module/InterProScan 5.53-87.0.list"} $interpro_result $FIMO_result ${fasta} $outdir ${Seqtype:-"p"} $hmmer_result $"${dir_name}/module/iTOL_NLR_template.txt"
echo -e "\nFinish NLRtracker!"
rm -rf ${outdir}tmp.fasta
else
echo -e "\nInterproscan output or FIMO output don't exist."
exit 1
fi