forked from CMSLQ/submitJobsWithCrabV2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmulticrab.py
executable file
·132 lines (114 loc) · 5.21 KB
/
multicrab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
import os
import subprocess
from optparse import OptionParser
#
# Use commands like ./multicrab.py -c status -d runBetaOneLQ1MC/testTag_2015Jul13_104935/
# This will check the status of the submitted crab jobs over multiple datasets.
from CRABAPI.RawCommand import crabCommand
def getOptions():
"""
Parse and return the arguments provided by the user.
"""
usage = ('usage: %prog -c CMD -d DIR [-o OPT]\nThe multicrab command'
' executes "crab CMD OPT" for each task contained in DIR\nUse'
' multicrab -h for help"')
parser = OptionParser(usage=usage)
parser.add_option("-c", "--crabCmd", dest="crabCmd",
help=("The crab command you want to execute for each task in "
"the DIR"), metavar="CMD")
parser.add_option("-d", "--projDir", dest="projDir",
help="The directory where the tasks are located", metavar="DIR")
parser.add_option("-o", "--crabCmdOptions", dest="crabCmdOptions",
help=("The options you want to pass to the crab command CMD"
"tasklistFile"), metavar="OPT", default="")
parser.add_option("-r", "--noAutoResubmit", dest="noAutoResubmit",
help=("don't automatically run the resub commands"),
metavar="noAutoResub",default=False,action="store_true")
parser.add_option("-i", "--ignoreCache", dest="ignoreMulticrabCache",
help=("don't use cache file to skip checking status of jobs already done"),
metavar="ignoreCache",default=False,action="store_true")
(options, args) = parser.parse_args()
if args:
parser.error("Found positional argument(s) %s." % args)
if not options.crabCmd:
parser.error("(-c CMD, --crabCmd=CMD) option not provided")
if not options.projDir:
parser.error("(-d DIR, --projDir=DIR) option not provided")
if not os.path.isdir(options.projDir):
parser.error("Directory %s does not exist" % options.projDir)
return options
def main():
"""
Main
"""
options = getOptions()
completedTasksFromCache = []
if not options.ignoreMulticrabCache:
# read our cache file (don't check status for completed tasks each time)
cacheFile = os.path.abspath(options.projDir+'/multicrab.cache')
if os.path.isfile(cacheFile):
ourCacheFile = open(cacheFile,'r')
for line in ourCacheFile:
completedTasksFromCache.append(line.strip())
ourCacheFile.close()
ourCacheFile = open(cacheFile,'a')
tasksStatusDict = {}
# Execute the command with its arguments for each task.
for task in os.listdir(options.projDir):
task = os.path.join(options.projDir, task)
if not os.path.isdir(task):
continue
# ignore non-crab dirs
if 'workdir' in task or 'cfgfiles' in task or 'output' in task:
continue
if options.crabCmd=='status' and task in completedTasksFromCache:
print "Don't check status of task, was already completed:",task
tasksStatusDict[task] = 'COMPLETED'
continue
print
print ("Executing (the equivalent of): crab %s %s %s" %
(options.crabCmd, task, options.crabCmdOptions))
res = crabCommand(options.crabCmd, task, *options.crabCmdOptions.split())
# stop here if we're not checking the status
if options.crabCmd != 'status':
continue
if 'failed' in res['jobsPerStatus'].keys():
tasksStatusDict[task] = 'FAILED' # if there's at least one failed job, count task as FAILED so we resubmit
else:
tasksStatusDict[task] = res['status']
#print "res['jobsPerStatus'].keys()={}".format(res['jobsPerStatus'].keys())
#print res
tasksStatusDict[task] = res['status']
if tasksStatusDict[task]=='COMPLETED':
ourCacheFile.write(task+'\n')
ourCacheFile.close()
totalTasks = len(tasksStatusDict)
tasksCompleted = [task for task in tasksStatusDict if tasksStatusDict[task]=='COMPLETED']
tasksSubmitted = [task for task in tasksStatusDict if tasksStatusDict[task]=='SUBMITTED']
tasksFailed = [task for task in tasksStatusDict if tasksStatusDict[task]=='FAILED']
tasksOther = [task for task in tasksStatusDict if task not in tasksCompleted and task not in tasksSubmitted and task not in tasksFailed]
print
print
print 'SUMMARY'
if len(tasksCompleted) > 0:
print 'Tasks completed:',len(tasksCompleted),'/',totalTasks
if len(tasksSubmitted) > 0:
print 'Tasks submitted:',len(tasksSubmitted),'/',totalTasks
if len(tasksFailed) > 0:
print 'Tasks failed:',len(tasksFailed),'/',totalTasks
if len(tasksOther) > 0:
print 'Tasks with other status:',len(tasksOther),'/',totalTasks
for task in tasksOther:
print '\tTask:',task,'\tStatus:',tasksStatusDict[task]
if len(tasksFailed) > 0:
print 'commands to resubmit failed tasks (or tasks with failed jobs):'
for task in tasksFailed:
resubmitCmd = 'crab resubmit --maxmemory=3000 '+task
print
print '\t'+resubmitCmd
if not options.noAutoResubmit:
print 'Automatically resubmitting...'
subprocess.call(resubmitCmd.split())
if __name__ == '__main__':
main()