-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenewise.cwl
307 lines (264 loc) · 8.51 KB
/
genewise.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
#!/usr/bin/env cwl-runner
# Copyright (C) 2019 - 2024 EMBL - European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
cwlVersion: v1.0
class: CommandLineTool
label: "Genewise"
id: "genewise"
baseCommand: python
inputs:
# Web Service Clients: Common Entries
command:
type: File
inputBinding:
position: 0
default:
class: File
location: ../../webservice-clients/python/genewise.py
email:
type: string?
doc: "E-mail address"
inputBinding:
prefix: --email
position: 2
title:
type: string?
doc: "Job title"
inputBinding:
prefix: --title
position: 3
jobid:
type: string?
doc: "Job identifier"
inputBinding:
prefix: --jobid
position: 1
polljob:
type: boolean?
doc: "Get job result"
inputBinding:
prefix: --polljob
position: 2
outfile:
type: string?
doc: "File name for results"
inputBinding:
prefix: --outfile
position: 4
outformat:
type: string?
doc: "Output format for results"
inputBinding:
prefix: --outformat
position: 5
pollfreq:
type: int?
doc: "Poll frequency in seconds (default 3s)"
inputBinding:
prefix: --pollFreq
position: 6
params:
type: boolean?
doc: "List input parameters"
inputBinding:
prefix: --params
position: 1
paramdetails:
type: string?
doc: "Get details for parameter"
inputBinding:
prefix: --paramDetail
position: 1
resultTypes:
type: string?
doc: "Get result types"
inputBinding:
prefix: --resultTypes
position: 1
asyncjob:
type: boolean?
doc: "Asynchronous mode"
inputBinding:
prefix: --asyncjob
position: 2
status:
type: boolean?
doc: "Get job status"
inputBinding:
prefix: --status
position: 2
version:
type: boolean?
doc: "Prints out the version of the Client and exit"
inputBinding:
prefix: --version
position: 1
baseUrl:
type: string?
doc: "Base URL for service"
inputBinding:
prefix: --baseUrl
position: 7
# Web Service Clients: Different Entries
asequence:
type: string?
label: "Input sequence A"
doc: "Sequence filename or ID"
inputBinding:
prefix: --asequence
position: 8
bsequence:
type: string?
label: "Input sequence B"
doc: "Sequence filename or ID"
inputBinding:
prefix: --bsequence
position: 9
para:
type: string?
label: Show parameters in alignment
doc: "Show parameters in the output alignmment, as in genewise."
inputBinding:
prefix: --para
position: 10
default: "true"
pretty:
type: string?
label: Pretty ASCII view
doc: "Show pretty ASCII alignment viewing, as in genewise."
inputBinding:
prefix: --pretty
position: 11
default: "true"
genes:
type: string?
label: Gene Structure
doc: "Show gene structure, as in genewise"
inputBinding:
prefix: --genes
position: 12
default: "true"
trans:
type: string?
label: Protein Translation
doc: "Show protein translation, breaking at frameshifts."
inputBinding:
prefix: --trans
position: 13
default: "true"
cdna:
type: string?
label: cDNA
doc: "Show cDNA, as in genewise."
inputBinding:
prefix: --cdna
position: 14
default: "true"
embl:
type: string?
label: EMBL Feature.
doc: "EMBL feature table format with CDS key."
inputBinding:
prefix: --embl
position: 15
default: "true"
ace:
type: string?
label: Ace file gene structure
doc: "Show Ace file gene structure, as in genewise."
inputBinding:
prefix: --ace
position: 16
default: "true"
gff:
type: string?
label: GFF output
doc: "Show Gene Feature Format file, as in genewise."
inputBinding:
prefix: --gff
position: 17
default: "true"
diana:
type: string?
label: EMBL Feature for diana
doc: "Show EMBL FT format suitable for diana."
inputBinding:
prefix: --diana
position: 18
default: "true"
init:
type: string?
label: Local/Global mode
doc: "Model in local/global mode. You should only put the model in global mode if you expect your protein homolog to have homology from start to end to the gene in the DNA sequence."
inputBinding:
prefix: --init
position: 19
default: "local"
splice:
type: string?
label: Splice site
doc: "Using splice model or GT/AG? Use the full blown model for splice sites, or a simplistic GT/AG. Generally if you are using a DNA sequence which is from human or worm, then leave this on. If you are using a very different (eg plant) species, switch it off."
inputBinding:
prefix: --splice
position: 20
default: "flat"
random:
type: string?
label: Random (Null) Model
doc: "The probability of the model has to compared to an alternative model (in fact to all alternative models which are possible) to allow proper Bayesian inference. This causes considerable difficulty in these algorithms because from a algorithmical point of view we would probably like to use an alternative model which is a single state, like the random model in profile-HMMs, where we can simply 'log-odd' the scored model, whereas from a biological point of view we probably want to use a full gene predicting alternative model.
In addition we need to account for the fact that the protein HMM or protein homolog probably does not extend over all the gene sequence, nor in fact does the gene have to be the only gene in the DNA sequence. This means that there are very good splice sites/poly-pyrimidine tracts outside of the 'matched' alignment can severely de-rail the alignment."
inputBinding:
prefix: --random
position: 21
default: "syn"
alg:
type: string?
label: Algorithm
doc: "The solutions is different in the genewise21:93 compared to the genewise 6:23 algorithms.
(1) In 6:23 we force the external match portions of the homology model to be identical to the alternative model, thus cancelling each other out. This is a pretty gross approximation and is sort of equivalent to the intron tie'ing. It makes things algorithmically easier... However this means a) 6:23 is nowhere near a probabilistic model and b) you really have to used a tied intron model in 6:23 otherwise very bad edge effects (final introns being ridiculously long) occur.
(2) In 21:93 we have a full probabilistic model on each side of the homology segment. This is not reported in the -pretty output but you can see it in the -alb output if you like. Do not trust the gene model outside of the homology segment however. By having these external gene model parts we can use all the gene model features safe in the knowledge that if the homology segments do not justify the match then the external part of the model will soak up the additional intron/py-tract/splice site biases.
"
inputBinding:
prefix: --alg
position: 22
default: "623"
outputs:
all:
type: File[]
streamable: true
outputBinding:
glob: "*"
$schemas:
- https://schema.org/version/latest/schemaorg-current-https.rdf
$namespaces:
s: http://schema.org/
edam: http://edamontology.org/
s:author:
- class: s:Person
s:identifier: https://orcid.org/0000-0001-8728-9449
s:email: mailto:[email protected]
s:name: Fábio Madeira (Web Production)
s:worksFor:
- class: s:Organization
s:name: EMBL - European Bioinformatics Institute
s:location: Hinxton, Cambridgeshire, CB10 1SD, UK
s:department:
- class: s:Organization
s:name: Web Production
# s:citation: https://dx.doi.org/10.6084/m9.figshare.3115156.v2
# s:codeRepository: https://github.com/common-workflow-language/common-workflow-language
s:dateCreated: "2018-08-03"
# s:license:
s:license:
- https://www.apache.org/licenses/LICENSE-2.0
- https://spdx.org/licenses/Apache-2.0
s:copyrightHolder: "EMBL - European Bioinformatics Institute"