Skip to content

Commit

Permalink
Add task flow that starts from scratch.
Browse files Browse the repository at this point in the history
  • Loading branch information
riga committed Apr 17, 2018
1 parent 113aed8 commit b7a11f6
Show file tree
Hide file tree
Showing 14 changed files with 1,792 additions and 7 deletions.
10 changes: 10 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
BasedOnStyle: WebKit
IndentWidth: 4
ColumnLimit: 0
BreakBeforeBraces: Allman
AllowShortFunctionsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
NamespaceIndentation: None
---
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*.DS_Store
*.pkl
*.egg-info
*.root
MANIFEST
__pycache__
dist
Expand Down
65 changes: 65 additions & 0 deletions analysis/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

import re
import os
import shutil

import law
import luigi

from analysis.util import calc_checksum
from analysis.config_2017 import analysis as analysis_2017, campaign as campaign_2017


Expand Down Expand Up @@ -63,6 +65,13 @@ def __init__(self, *args, **kwargs):
def store_parts(self):
return super(DatasetTask, self).store_parts() + (self.dataset,)

def create_branch_map(self):
return list(range(self.dataset_inst.n_files))

def glite_output_postfix(self):
self.get_branch_map()
return "_{}To{}".format(self.start_branch, self.end_branch)


class GridWorkflow(AnalysisTask, law.GLiteWorkflow):

Expand Down Expand Up @@ -118,8 +127,51 @@ def glite_job_config(self, config, job_num, branches):
return config


class InstallCMSSWCode(AnalysisTask):

version = None

def __init__(self, *args, **kwargs):
super(InstallCMSSWCode, self).__init__(*args, **kwargs)

self._checksum = None

@property
def checksum(self):
if self._checksum is None:
path = os.path.join(os.getenv("JTSF_BASE"), "cmssw")
self._checksum = calc_checksum(path, exclude=["*.pyc", "*.git*", "tmpfiles*"])

return self._checksum

def output(self):
return self.local_target("{}.txt".format(self.checksum))

def run(self):
# copy the current cmssw code to the CMSSW_BASE directory
for subsystem in ["jet_tagging_sf"]:
src = os.path.join(os.getenv("JTSF_BASE"), "cmssw", subsystem)
dst = os.path.join(os.getenv("CMSSW_BASE"), "src", subsystem)
if os.path.exists(dst):
shutil.rmtree(dst)
shutil.copytree(src, dst)

# install the software
code = law.util.interruptable_popen("scram b", shell=True, executable="/bin/bash",
cwd=os.path.dirname(dst))[0]
if code != 0:
raise Exception("scram build failed")

# touch the flag output file
output = self.output()
output.parent.touch(0o0770)
output.touch(self.checksum)


class UploadCMSSW(AnalysisTask, law.BundleCMSSW, law.TransferLocalFile):

force_upload = luigi.BoolParameter(default=False, description="force uploading")

# settings for BunddleCMSSW
cmssw_path = os.getenv("CMSSW_BASE")

Expand All @@ -129,6 +181,17 @@ class UploadCMSSW(AnalysisTask, law.BundleCMSSW, law.TransferLocalFile):
version = None
task_namespace = None

def __init__(self, *args, **kwargs):
super(UploadCMSSW, self).__init__(*args, **kwargs)

self.has_run = False

def complete(self):
if self.force_upload:
return self.has_run
else:
return super(UploadCMSSW, self).complete()

def single_output(self):
path = "{}.tgz".format(os.path.basename(self.cmssw_path))
return self.wlcg_target(path)
Expand All @@ -141,6 +204,8 @@ def run(self):
self.bundle(bundle)
self.transfer(bundle)

self.has_run = True


class UploadSoftware(AnalysisTask, law.TransferLocalFile):

Expand Down
13 changes: 12 additions & 1 deletion analysis/config_2017.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


# campaign
campaign = od.Campaign("2017_13Tev_25ns", 1, ecm=13, bx=25)
campaign = od.Campaign("2017_13Tev_25ns", 1, ecm=13000, bx=25)

# processes
process_data_ee = od.Process(
Expand All @@ -31,6 +31,7 @@
# link processes to datasets
dataset_data_ee.add_process(process_data_ee)


# add the analysis and a config for the 2017 campaign
analysis = od.Analysis("jet-tagging-sf", 1)
cfg = analysis.add_config(campaign=campaign)
Expand All @@ -40,3 +41,13 @@

# add datasets
cfg.add_dataset(dataset_data_ee)

# auxiliary data
cfg.set_aux("global_tag", {
"data": "94X_dataRun2_ReReco_EOY17_v6",
"mc": "94X_mc2017_realistic_v13",
})

# lumi and normtag file
cfg.set_aux("lumi_file", "/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/ReReco/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt")
cfg.set_aux("normtag_file", "/afs/cern.ch/user/l/lumipro/public/Normtags/normtag_PHYSICS.json")
181 changes: 181 additions & 0 deletions analysis/csvTreeMaker_cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
# -*- coding: utf-8 -*-

"""
Config file to create CSV SF tuples.
"""


import os

import FWCore.ParameterSet.Config as cms
from FWCore.PythonUtilities.LumiList import LumiList
from FWCore.ParameterSet.VarParsing import VarParsing


try:
# create options
options = VarParsing("python")

# set defaults of common options
options.setDefault("inputFiles", "root://xrootd-cms.infn.it//store/data/Run2017B/DoubleEG/MINIAOD/17Nov2017-v1/20000/065312BE-A3D5-E711-A0C7-0CC47A1E0DCC.root")
options.setDefault("outputFile", "output.root")
options.setDefault("maxEvents", 100)

# add custom options
options.register(
"globalTag",
"",
VarParsing.multiplicity.singleton,
VarParsing.varType.string,
"the global tag to use",
)
options.register(
"lumiFile",
"",
VarParsing.multiplicity.singleton,
VarParsing.varType.string,
"file for selecting runs and lumis",
)
options.register(
"isData",
False,
VarParsing.multiplicity.singleton,
VarParsing.varType.bool,
"input dataset contains real data",
)
options.register(
"triggers",
[],
VarParsing.multiplicity.list,
VarParsing.varType.string,
"triggers to use",
)
options.register(
"metFilters",
[],
VarParsing.multiplicity.list,
VarParsing.varType.string,
"MET filters to use",
)
options.register(
"jesFiles",
[],
VarParsing.multiplicity.list,
VarParsing.varType.string,
"txt files containing jes infos",
)
options.register(
"jesRanges",
[],
VarParsing.multiplicity.list,
VarParsing.varType.int,
"a flat list of range pairs",
)
options.register(
"jesUncFiles",
[],
VarParsing.multiplicity.list,
VarParsing.varType.string,
"txt files containing the combined jes uncertainty infos",
)
options.register(
"jesUncSrcFile",
"",
VarParsing.multiplicity.singleton,
VarParsing.varType.string,
"txt file containing the per-source jes uncertainty infos",
)
options.register(
"jesUncSources",
[],
VarParsing.multiplicity.list,
VarParsing.varType.string,
"jes uncertainty sources to consider",
)
options.register(
"reportEvery",
1000,
VarParsing.multiplicity.singleton,
VarParsing.varType.int,
"number of events after which a report message is written",
)
options.register(
"summary",
False,
VarParsing.multiplicity.singleton,
VarParsing.varType.bool,
"print a summary at the end?",
)
options.parseArguments()

# create the process and a sequence for additional modules
process = cms.Process("CSVSF")
seq = cms.Sequence()

# some default collections
defaultProcess = "RECO" if options.isData else "PAT"
electronCollection = cms.InputTag("slimmedElectrons", "", defaultProcess)
muonCollection = cms.InputTag("slimmedMuons", "", defaultProcess)
metCollection = cms.InputTag("slimmedMETs", "", defaultProcess)
jetCollection = cms.InputTag("slimmedJets", "", defaultProcess)

# message logger
process.load("FWCore.MessageLogger.MessageLogger_cfi")
process.MessageLogger.cerr.FwkReport.reportEvery = options.reportEvery

# source defintion
process.source = cms.Source("PoolSource", fileNames=cms.untracked.vstring(options.inputFiles))

# good run and lumi selection
if options.isData and options.lumiFile:
lumi_list = LumiList(filename=options.lumiFile)
process.source.lumisToProcess = lumi_list.getVLuminosityBlockRange()

# standard seuquences with global tag
if options.globalTag:
process.load("Configuration.StandardSequences.FrontierConditions_GlobalTag_cff")
process.GlobalTag.globaltag = options.globalTag

# geometry sequences
process.load("Configuration.StandardSequences.GeometryDB_cff")

# particle data table
process.load("SimGeneral.HepPDTESSource.pythiapdt_cfi")

# configure the tfile service
output_file = options.__getattr__("outputFile", noTags=True)
process.TFileService = cms.Service("TFileService", fileName=cms.string(output_file))

# load and configure the csv tree maker
process.load("jet_tagging_sf.jet_tagging_sf.csvTreeMaker_cfi")
process.csvTreeMaker.isData = cms.bool(options.isData)
process.csvTreeMaker.verbose = cms.untracked.bool(True)
process.csvTreeMaker.triggers = cms.vstring(options.triggers)
process.csvTreeMaker.metFilters = cms.vstring(options.metFilters)
process.csvTreeMaker.jesFiles = cms.vstring(options.jesFiles)
process.csvTreeMaker.jesRanges = cms.vint32(options.jesRanges)
process.csvTreeMaker.jesUncFiles = cms.vstring(options.jesUncFiles)
process.csvTreeMaker.jesUncSrcFile = cms.string(options.jesUncSrcFile)
process.csvTreeMaker.jesUncSources = cms.vstring(options.jesUncSources)
process.csvTreeMaker.electronCollection = electronCollection
process.csvTreeMaker.muonCollection = muonCollection
process.csvTreeMaker.metCollection = metCollection
process.csvTreeMaker.jetCollection = jetCollection
process.csvTreeMaker.rhoCollection = cms.InputTag("fixedGridRhoFastjetAll")
process.csvTreeMaker.eleVIDCollection = cms.InputTag("egmGsfElectronIDs:cutBasedElectronID-Fall17-94X-V1-tight")

# additional configuration
process.maxEvents = cms.untracked.PSet(input=cms.untracked.int32(options.maxEvents))

process.options = cms.untracked.PSet(
allowUnscheduled=cms.untracked.bool(True),
wantSummary=cms.untracked.bool(options.summary),
)

# tell the process what to run
process.p = cms.Path(seq + process.csvTreeMaker)

except:
import traceback
traceback.print_exc()
raise
Loading

0 comments on commit b7a11f6

Please sign in to comment.