-
Notifications
You must be signed in to change notification settings - Fork 0
/
qstat
executable file
·392 lines (338 loc) · 11.4 KB
/
qstat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#!/bin/bash
usage () {
cat << EOF
Usage: qstat [OPTIONS] [JOBID1 JOBID2...|DESTINATION] [@SERVER]
This command provides a lightweight alternative to qstat. Data
are queried and updated every minute from the PBS job scheduler.
Options not listed here will be forwarded to the scheduler.
Please use those options sparingly.
Job IDs, if provided, should be numeric only and space delimited.
If a destination is provided, it should be a valid execution
queue on the chosen server. This cached version of qstat does not
allow mixed queries from multiple servers - only one server may
be specified per request.
Options:
-h, --help display this help and exit
-f display full output for a job
-H job output regardless of state or all finished jobs
-l disable labels (no header)
-n display a list of nodes at the end of the line
-s display administrator comment on the next line
--status filter jobs by specific single-character status code
-u filter jobs by the submitting user
-w use wide format output (120 columns)
-x include recently finished jobs in output
EOF
exit
}
# Function to load a server config (or site config if only one server)
function load_config {
# Backward compatibility
if [[ -n $QSCACHE_CONFIG ]]; then
QSCACHE_SERVER=${QSCACHE_CONFIG%.*}
fi
if [[ ! -e $MYPATH/${QSCACHE_SERVER:=site}.cfg ]]; then
echo -e "No site config found for qstat-cache. Bypassing cache ...\n" >&2
# Try to find PBS qstat
QSTATBIN=$(which --all qstat | tail -n1)
export QSCACHE_RECURSION=true
if [[ ! -z $QSTATBIN ]]; then
QSCACHE_BYPASS=true
BYPASS_REASON=nocfg
else
echo "Fatal: PBS qstat location could not be determined" >&2
exit 1
fi
else
source $MYPATH/${QSCACHE_SERVER}.cfg
if [[ ! -d $DATAPATH ]]; then
echo -e "Specified data path does not exist. Bypassing cache ...\n" >&2
QSCACHE_BYPASS=true
BYPASS_REASON=nodata
else
# If cache is missing or older than five minutes, warn and bypass
CMD='grep --color=never "." $DATAPATH/updated' query_cache
CACHETIME=$DATA
CACHEAGE=$(expr $(date +%s) - $CACHETIME)
if [[ $CACHEAGE -ge $MAXAGE ]]; then
echo -e "Cached data is more than five minutes old. Bypassing cache ...\n" >&2
QSCACHE_BYPASS=true
BYPASS_REASON=olddata
BYPASS_DELAY=$AGEDELAY
fi
fi
if [[ ${QSTATBIN}z == z ]]; then
echo "Fatal: qstat location not specified" >&2
exit 1
fi
fi
}
# Function to run a command on cache file, ensuring that cached file exists
function query_cache {
while true; do
DATA=$(eval "$CMD; exit \${PIPESTATUS[0]}" 2>&1)
if [[ $? == 0 ]]; then
break
elif [[ $SECONDS -gt $MAXWAIT ]]; then
echo -e "Cache data search timeout. Bypassing cache ...\n" >&2
log_usage no "reason=maxwait"
$QSTATBIN $ARGLIST
exit $?
else
if [[ ${QSCACHE_DEBUG^^} == true ]]; then
echo -e "\nError at $(date):"
echo "$DATA"
fi
sleep 1
fi
done
}
# Function to log usage (accepts argument for whether cached call or not)
function log_usage {
if [[ -d $LOGPATH ]]; then
TS=$(date '+%H.%M:%S') LOGFILE=$(whoami)-$(date +%Y%m%d).log
printf "%-10s %-20s %-10s %-10s %-15s %s\n" $TS $HN $PPID "cache=$1" "$2" "$ARGLIST" >> $LOGPATH/$LOGFILE
fi
}
# Function to decode and load settings for other supported servers
function find_server {
if [[ ! -f $MYPATH/${CUSTOMSERVER}.cfg ]]; then
load_config
for PBSSERVER in $SERVERMAP; do
if [[ $PBSSERVER == $CUSTOMSERVER=* ]]; then
CUSTOMSERVER=${PBSSERVER#*=}
fi
done
fi
if [[ ! -f $MYPATH/${CUSTOMSERVER}.cfg ]]; then
>&2 echo "Error: server $CUSTOMSERVER not recognized"
exit 1
fi
QSCACHE_SERVER=$CUSTOMSERVER
}
# Function to bypass caching if args are not understand
function arg_bypass {
load_config
if [[ ! -f $QSTATBIN ]]; then
>&2 echo "Error: option $1 not supported on $HN"
exit 1
fi
sleep 1
log_usage no "reason=args"
$QSTATBIN $ARGLIST
exit $?
}
# Avoid infinite recursion
if [[ $QSCACHE_RECURSION == true ]]; then
echo "Fatal: PBS qstat location could not be determined" >&2
exit 1
fi
# Defaults for site-config (make edits in that file)
SECONDS=0 DATAPATH= LOGPATH= MAXWAIT=60 MAXAGE=300
QSTATBIN=
# Internal variables
MYPATH=$(dirname $(readlink -f $0))
JOBLIST= USERSTR= NOLABELS= CUSTOMSERVER=
ARGLIST="$*"
DATAFILE=joblist-default.dat
COPER="!~"
AWKOPT=print
STATCODE="[FM]"
HN=$(hostname)
BYPASS_DELAY=0
# Check for bypass setting
if [[ ${QSCACHE_BYPASS^^} == TRUE ]]; then
load_config
log_usage no "reason=manual"
$QSTATBIN $ARGLIST
exit $?
fi
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
usage
;;
-*)
FLAGOPTS=${1:1}
while [[ -n $FLAGOPTS ]]; do
case ${FLAGOPTS:0:1} in
a)
# Pass through since -a is assumed with default output
;;
f)
FULLMODE=true
DATAFILE=${DATAFILE/default/full}
;;
H)
HMODE=true
DATAFILE=${DATAFILE/default/info}
;;
l)
NOLABELS=true
;;
n)
DATAFILE=${DATAFILE/./-nodes.}
DATAFILE=${DATAFILE/default/info}
;;
s)
AWKOPT="for (i=1; i<=2; i++) {print; getline}"
DATAFILE=${DATAFILE/job/comm}
DATAFILE=${DATAFILE/default/info}
;;
u)
DATAFILE=${DATAFILE/default/info}
if [[ $FLAGOPTS != u ]]; then
USERSTR="${FLAGOPTS:1}"
else
USERSTR="$2"
shift
fi
unset FLAGOPTS
;;
w)
DATAFILE=${DATAFILE/default/wide}
DATAFILE=${DATAFILE/info/wide}
;;
x)
STATCODE=
;;
*)
arg_bypass
;;
esac
FLAGOPTS=${FLAGOPTS:1}
done
;;
--status)
STATCODE="$2"
COPER="~"
shift; ;;
@*)
CUSTOMSERVER=${1:1} find_server
;;
[0-9]*)
if [[ $1 == *.* ]]; then
CUSTOMSERVER=${1#*.} find_server
fi
if [[ -z "$JOBLIST" ]]; then
JOBLIST="${1%%.*}"
else
JOBLIST="$JOBLIST|${1%%.*}"
fi
;;
[a-zA-Z]*)
if [[ $1 == *@* ]]; then
CUSTOMSERVER=${1#*@} find_server
fi
if [[ -z "$DESTLIST" ]]; then
DESTLIST="${1%%@*}"
else
DESTLIST="$DESTLIST|${1%%@*}"
fi
;;
*)
arg_bypass
;;
esac
shift
done
# Historical full data requires bypass
if [[ $FULLMODE == true ]]; then
if [[ -z $STATCODE ]] || [[ $STATCODE == F ]]; then
QSCACHE_BYPASS=true
BYPASS_REASON="args"
fi
fi
# Load chosen server config
load_config
if [[ $QSCACHE_BYPASS == true ]]; then
log_usage no "reason=$BYPASS_REASON"
sleep $BYPASS_DELAY
$QSTATBIN $ARGLIST
exit $?
fi
if [[ $FULLMODE == true ]]; then
if [[ -n $JOBLIST ]]; then
CMD="awk -vRS='' '/Job Id: ($JOBLIST)./{print \$0\"\n\"}' $DATAPATH/$DATAFILE"
else
CMD="awk -vRS='' '/Job_Owner = $(id -u -n)/{print \$0\"\n\"}' $DATAPATH/$DATAFILE"
if [[ -n $DESTLIST ]]; then
CMD="$CMD | awk -vRS='' '/queue = ($DESTLIST)/{print \$0\"\n\"}'"
fi
fi
if [[ $COPER == "~" ]]; then
CMD="$CMD | awk -vRS='' '/job_state = $STATCODE/{print \$0\"\n\"}'"
fi
# Get requested output
query_cache
if [[ "${DATA}z" != z ]]; then
echo -e "$DATA"
RVAL=0
else
RVAL=1
fi
else
CMD="cat $DATAPATH/$DATAFILE"
# H-mode behavior depends on whether job is specified
if [[ $COPER == "!~" ]] && [[ $HMODE == true ]]; then
if [[ -n $JOBLIST ]]; then
STATCODE=
else
COPER="~"
fi
fi
# Set column IDs
if [[ $DATAFILE == *default* ]]; then
UCOL=3 SCOL=5 DCOL=6
else
UCOL=2 SCOL=10 DCOL=3
fi
if [[ $DATAFILE != *wide* ]] && [[ ${#USERSTR} -gt 8 ]]; then
>&2 echo "Warning: username is truncated in cached output; mismatched output possible"
>&2 echo -e " use -w option for robust long username support\n"
USERSTR="${USERSTR:0:7}*"
fi
# Attach various filters to output
if [[ ! -z "$STATCODE" ]]; then
CMD="$CMD | awk '\$$SCOL && \$$SCOL $COPER \"$STATCODE\" && \$1 ~ /^[0-9]/ { $AWKOPT }'"
fi
if [[ ! -z "$USERSTR" ]] && [[ ${USERSTR^^} != ALL ]]; then
CMD="$CMD | awk '\$$UCOL == \"$USERSTR\" { $AWKOPT }'"
fi
if [[ ! -z "$JOBLIST" ]]; then
JOBLIST=${JOBLIST/\[\]/\\\[\\\]}
CMD="$CMD | awk '\$1 ~ /^($JOBLIST)\./ { $AWKOPT }'"
fi
if [[ ! -z "$DESTLIST" ]]; then
CMD="$CMD | awk '\$$DCOL ~ /^($DESTLIST)$/ { $AWKOPT }'"
fi
# Get requested output
query_cache
if [[ "${DATA}z" != z ]]; then
if [[ -z $NOLABELS ]]; then
if [[ $DATAFILE == *default* ]]; then
echo "Job id Name User Time Use S Queue"
echo "---------------- ---------------- ---------------- -------- - -----"
elif [[ $DATAFILE == *info* ]]; then
echo " Req'd Req'd Elap"
echo "Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time"
echo "--------------- -------- -------- ---------- ------ --- --- ------ ----- - -----"
else
echo " Req'd Req'd Elap"
echo "Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time"
echo "------------------------------ --------------- --------------- --------------- -------- ---- ----- ------ ----- - -----"
fi
fi
echo -e "$DATA"
RVAL=0
else
RVAL=1
fi
fi
# Print cache age in debug mode
if [[ ${QSCACHE_DEBUG^^} == TRUE ]]; then
echo -e "\nCached at: $(date -d @$CACHETIME)"
fi
log_usage yes
exit $RVAL