forked from CMSLQ/submitJobsWithCrabV2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
85 lines (73 loc) · 3.63 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
def GetOutputDatasetTag(dataset):
return GetOutputDatasetTagAndModifiedDatasetName(dataset)[0]
def GetModifiedDatasetName(dataset):
return GetOutputDatasetTagAndModifiedDatasetName(dataset)[1]
def GetOutputDatasetTagAndModifiedDatasetName(dataset):
outputDatasetTag = 'LQ' #overridden for data
datasetNoSlashes = dataset[1:len(dataset)].replace('/','__')
# datasetNameNoSlashes looks like SinglePhoton__Run2015D-PromptReco-v3
# so split to just get Run2015D-PromptReco-v3
# and use that as the outputDatasetTag to get it into the EOS path
primaryDatasetName = datasetNoSlashes.split('__')[0]
secondaryDatasetName = datasetNoSlashes.split('__')[1]
#print 'primaryDatasetName={}'.format(primaryDatasetName)
#print 'secondaryDatasetName={}'.format(secondaryDatasetName)
datasetName = datasetNoSlashes
datasetName = datasetName.split('__')[0]+'__'+datasetName.split('__')[1] # get rid of part after last slash
#thisWorkDir = workDir+'/'+datasetName
isData = 'Run20' in datasetName
if not isData:
datasetName=datasetName.split('__')[0]
# config.Data.splitting = 'FileBased'
# config.Data.unitsPerJob = nUnitsPerJob
#else:
# config.Data.outputDatasetTag=secondaryDatasetName
# config.Data.splitting = 'Automatic'
#Handle the ext1 vs non ext case specially
#print 'datasetName={}'.format(datasetName)
if not isData:
if 'ext' in dataset:
extN = dataset[dataset.find('_ext')+4]
datasetName=datasetName+'_ext'+extN
outputDatasetTag='LQ_ext'+extN
if 'backup' in dataset:
datasetName=datasetName+'_backup'
outputDatasetTag='LQ_backup'
#This is for DY 10-50 which has a v1 and v2, and an ext1
if '-v2' in dataset:
datasetName=datasetName+'-v2'
outputDatasetTag='LQ-v2'
elif '-v1' in dataset:
datasetName=datasetName+'-v1'
outputDatasetTag='LQ-v1'
else:
outputDatasetTag=secondaryDatasetName
return outputDatasetTag,datasetName,primaryDatasetName,secondaryDatasetName,isData
def GetOutputFilename(dataset,isMC):
outputFileNames = []
outputFileNames.append(dataset[1:dataset.find('_Tune')])
outputFileNames.append(dataset[1:dataset.find('_13TeV')])
outputFileNames.append(dataset.split('/')[1])
# get the one with the shortest filename
outputFile = sorted(outputFileNames, key=len)[0]
if not isMC:
outputFile = outputFile + '_' + GetOutputDatasetTag(dataset)
if 'ext' in dataset:
extN = dataset[dataset.find('_ext')+4]
outputFile = outputFile+'_ext'+extN
if 'backup' in dataset:
outputFile = outputFile+'_backup'
return outputFile+'.root'
def GetFileList(inputList):
fileList = []
if len(inputList) > 0:
with open(inputList,'r') as filelist:
for line in filelist:
fileList.append(line.strip())
else:
#fileList.append('/afs/cern.ch/user/s/scooper/work/private/cmssw/1100pre10/AddTrigObjMatchToNano/src/PhysicsTools/NanoAOD/test/myNanoProdMc2016_trigObj_NANO.root')
#fileList.append('/afs/cern.ch/user/s/scooper/work/private/cmssw/1100pre9/MinimalStockNanoChanges2/src/PhysicsTools/NanoAOD/test/myNanoProdMc2016_NANO_2.root')
#fileList.append('root://cms-xrd-global.cern.ch//store/mc/RunIISummer16NanoAODv6/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/NANOAODSIM/PUMoriond17_Nano25Oct2019_102X_mcRun2_asymptotic_v7_ext2-v1/100000/E65D285B-B38F-F14A-AE07-87A7FDCF11E7.root')
fileList.append('/tmp/scooper/FF746568-EC2F-8E41-8BF2-840FA8E95F9A.root')
return fileList