Add task flow that starts from scratch.

cms-btv-pog · Apr 17, 2018 · b7a11f6 · b7a11f6
1 parent 113aed8
commit b7a11f6
Show file tree

Hide file tree

Showing 14 changed files with 1,792 additions and 7 deletions.
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,10 @@
+---
+BasedOnStyle: WebKit
+IndentWidth: 4
+ColumnLimit: 0
+BreakBeforeBraces: Allman
+AllowShortFunctionsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+NamespaceIndentation: None
+---
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@
 *.DS_Store
 *.pkl
 *.egg-info
+*.root
 MANIFEST
 __pycache__
 dist

diff --git a/analysis/base.py b/analysis/base.py
@@ -6,10 +6,12 @@
 
 import re
 import os
+import shutil
 
 import law
 import luigi
 
+from analysis.util import calc_checksum
 from analysis.config_2017 import analysis as analysis_2017, campaign as campaign_2017
 
 
@@ -63,6 +65,13 @@ def __init__(self, *args, **kwargs):
     def store_parts(self):
         return super(DatasetTask, self).store_parts() + (self.dataset,)
 
+    def create_branch_map(self):
+        return list(range(self.dataset_inst.n_files))
+
+    def glite_output_postfix(self):
+        self.get_branch_map()
+        return "_{}To{}".format(self.start_branch, self.end_branch)
+
 
 class GridWorkflow(AnalysisTask, law.GLiteWorkflow):
 
@@ -118,8 +127,51 @@ def glite_job_config(self, config, job_num, branches):
         return config
 
 
+class InstallCMSSWCode(AnalysisTask):
+
+    version = None
+
+    def __init__(self, *args, **kwargs):
+        super(InstallCMSSWCode, self).__init__(*args, **kwargs)
+
+        self._checksum = None
+
+    @property
+    def checksum(self):
+        if self._checksum is None:
+            path = os.path.join(os.getenv("JTSF_BASE"), "cmssw")
+            self._checksum = calc_checksum(path, exclude=["*.pyc", "*.git*", "tmpfiles*"])
+
+        return self._checksum
+
+    def output(self):
+        return self.local_target("{}.txt".format(self.checksum))
+
+    def run(self):
+        # copy the current cmssw code to the CMSSW_BASE directory
+        for subsystem in ["jet_tagging_sf"]:
+            src = os.path.join(os.getenv("JTSF_BASE"), "cmssw", subsystem)
+            dst = os.path.join(os.getenv("CMSSW_BASE"), "src", subsystem)
+            if os.path.exists(dst):
+                shutil.rmtree(dst)
+            shutil.copytree(src, dst)
+
+        # install the software
+        code = law.util.interruptable_popen("scram b", shell=True, executable="/bin/bash",
+            cwd=os.path.dirname(dst))[0]
+        if code != 0:
+            raise Exception("scram build failed")
+
+        # touch the flag output file
+        output = self.output()
+        output.parent.touch(0o0770)
+        output.touch(self.checksum)
+
+
 class UploadCMSSW(AnalysisTask, law.BundleCMSSW, law.TransferLocalFile):
 
+    force_upload = luigi.BoolParameter(default=False, description="force uploading")
+
     # settings for BunddleCMSSW
     cmssw_path = os.getenv("CMSSW_BASE")
 
@@ -129,6 +181,17 @@ class UploadCMSSW(AnalysisTask, law.BundleCMSSW, law.TransferLocalFile):
     version = None
     task_namespace = None
 
+    def __init__(self, *args, **kwargs):
+        super(UploadCMSSW, self).__init__(*args, **kwargs)
+
+        self.has_run = False
+
+    def complete(self):
+        if self.force_upload:
+            return self.has_run
+        else:
+            return super(UploadCMSSW, self).complete()
+
     def single_output(self):
         path = "{}.tgz".format(os.path.basename(self.cmssw_path))
         return self.wlcg_target(path)
@@ -141,6 +204,8 @@ def run(self):
         self.bundle(bundle)
         self.transfer(bundle)
 
+        self.has_run = True
+
 
 class UploadSoftware(AnalysisTask, law.TransferLocalFile):
 

diff --git a/analysis/config_2017.py b/analysis/config_2017.py
@@ -9,7 +9,7 @@
 
 
 # campaign
-campaign = od.Campaign("2017_13Tev_25ns", 1, ecm=13, bx=25)
+campaign = od.Campaign("2017_13Tev_25ns", 1, ecm=13000, bx=25)
 
 # processes
 process_data_ee = od.Process(
@@ -31,6 +31,7 @@
 # link processes to datasets
 dataset_data_ee.add_process(process_data_ee)
 
+
 # add the analysis and a config for the 2017 campaign
 analysis = od.Analysis("jet-tagging-sf", 1)
 cfg = analysis.add_config(campaign=campaign)
@@ -40,3 +41,13 @@
 
 # add datasets
 cfg.add_dataset(dataset_data_ee)
+
+# auxiliary data
+cfg.set_aux("global_tag", {
+    "data": "94X_dataRun2_ReReco_EOY17_v6",
+    "mc": "94X_mc2017_realistic_v13",
+})
+
+# lumi and normtag file
+cfg.set_aux("lumi_file", "/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions17/13TeV/ReReco/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt")
+cfg.set_aux("normtag_file", "/afs/cern.ch/user/l/lumipro/public/Normtags/normtag_PHYSICS.json")
diff --git a/analysis/csvTreeMaker_cfg.py b/analysis/csvTreeMaker_cfg.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+
+"""
+Config file to create CSV SF tuples.
+"""
+
+
+import os
+
+import FWCore.ParameterSet.Config as cms
+from FWCore.PythonUtilities.LumiList import LumiList
+from FWCore.ParameterSet.VarParsing import VarParsing
+
+
+try:
+    # create options
+    options = VarParsing("python")
+
+    # set defaults of common options
+    options.setDefault("inputFiles", "root://xrootd-cms.infn.it//store/data/Run2017B/DoubleEG/MINIAOD/17Nov2017-v1/20000/065312BE-A3D5-E711-A0C7-0CC47A1E0DCC.root")
+    options.setDefault("outputFile", "output.root")
+    options.setDefault("maxEvents", 100)
+
+    # add custom options
+    options.register(
+        "globalTag",
+        "",
+        VarParsing.multiplicity.singleton,
+        VarParsing.varType.string,
+        "the global tag to use",
+    )
+    options.register(
+        "lumiFile",
+        "",
+        VarParsing.multiplicity.singleton,
+        VarParsing.varType.string,
+        "file for selecting runs and lumis",
+    )
+    options.register(
+        "isData",
+        False,
+        VarParsing.multiplicity.singleton,
+        VarParsing.varType.bool,
+        "input dataset contains real data",
+    )
+    options.register(
+        "triggers",
+        [],
+        VarParsing.multiplicity.list,
+        VarParsing.varType.string,
+        "triggers to use",
+    )
+    options.register(
+        "metFilters",
+        [],
+        VarParsing.multiplicity.list,
+        VarParsing.varType.string,
+        "MET filters to use",
+    )
+    options.register(
+        "jesFiles",
+        [],
+        VarParsing.multiplicity.list,
+        VarParsing.varType.string,
+        "txt files containing jes infos",
+    )
+    options.register(
+        "jesRanges",
+        [],
+        VarParsing.multiplicity.list,
+        VarParsing.varType.int,
+        "a flat list of range pairs",
+    )
+    options.register(
+        "jesUncFiles",
+        [],
+        VarParsing.multiplicity.list,
+        VarParsing.varType.string,
+        "txt files containing the combined jes uncertainty infos",
+    )
+    options.register(
+        "jesUncSrcFile",
+        "",
+        VarParsing.multiplicity.singleton,
+        VarParsing.varType.string,
+        "txt file containing the per-source jes uncertainty infos",
+    )
+    options.register(
+        "jesUncSources",
+        [],
+        VarParsing.multiplicity.list,
+        VarParsing.varType.string,
+        "jes uncertainty sources to consider",
+    )
+    options.register(
+        "reportEvery",
+        1000,
+        VarParsing.multiplicity.singleton,
+        VarParsing.varType.int,
+        "number of events after which a report message is written",
+    )
+    options.register(
+        "summary",
+        False,
+        VarParsing.multiplicity.singleton,
+        VarParsing.varType.bool,
+        "print a summary at the end?",
+    )
+    options.parseArguments()
+
+    # create the process and a sequence for additional modules
+    process = cms.Process("CSVSF")
+    seq = cms.Sequence()
+
+    # some default collections
+    defaultProcess = "RECO" if options.isData else "PAT"
+    electronCollection = cms.InputTag("slimmedElectrons", "", defaultProcess)
+    muonCollection = cms.InputTag("slimmedMuons", "", defaultProcess)
+    metCollection = cms.InputTag("slimmedMETs", "", defaultProcess)
+    jetCollection = cms.InputTag("slimmedJets", "", defaultProcess)
+
+    # message logger
+    process.load("FWCore.MessageLogger.MessageLogger_cfi")
+    process.MessageLogger.cerr.FwkReport.reportEvery = options.reportEvery
+
+    # source defintion
+    process.source = cms.Source("PoolSource", fileNames=cms.untracked.vstring(options.inputFiles))
+
+    # good run and lumi selection
+    if options.isData and options.lumiFile:
+        lumi_list = LumiList(filename=options.lumiFile)
+        process.source.lumisToProcess = lumi_list.getVLuminosityBlockRange()
+
+    # standard seuquences with global tag
+    if options.globalTag:
+        process.load("Configuration.StandardSequences.FrontierConditions_GlobalTag_cff")
+        process.GlobalTag.globaltag = options.globalTag
+
+    # geometry sequences
+    process.load("Configuration.StandardSequences.GeometryDB_cff")
+
+    # particle data table
+    process.load("SimGeneral.HepPDTESSource.pythiapdt_cfi")
+
+    # configure the tfile service
+    output_file = options.__getattr__("outputFile", noTags=True)
+    process.TFileService = cms.Service("TFileService", fileName=cms.string(output_file))
+
+    # load and configure the csv tree maker
+    process.load("jet_tagging_sf.jet_tagging_sf.csvTreeMaker_cfi")
+    process.csvTreeMaker.isData = cms.bool(options.isData)
+    process.csvTreeMaker.verbose = cms.untracked.bool(True)
+    process.csvTreeMaker.triggers = cms.vstring(options.triggers)
+    process.csvTreeMaker.metFilters = cms.vstring(options.metFilters)
+    process.csvTreeMaker.jesFiles = cms.vstring(options.jesFiles)
+    process.csvTreeMaker.jesRanges = cms.vint32(options.jesRanges)
+    process.csvTreeMaker.jesUncFiles = cms.vstring(options.jesUncFiles)
+    process.csvTreeMaker.jesUncSrcFile = cms.string(options.jesUncSrcFile)
+    process.csvTreeMaker.jesUncSources = cms.vstring(options.jesUncSources)
+    process.csvTreeMaker.electronCollection = electronCollection
+    process.csvTreeMaker.muonCollection = muonCollection
+    process.csvTreeMaker.metCollection = metCollection
+    process.csvTreeMaker.jetCollection = jetCollection
+    process.csvTreeMaker.rhoCollection = cms.InputTag("fixedGridRhoFastjetAll")
+    process.csvTreeMaker.eleVIDCollection = cms.InputTag("egmGsfElectronIDs:cutBasedElectronID-Fall17-94X-V1-tight")
+
+    # additional configuration
+    process.maxEvents = cms.untracked.PSet(input=cms.untracked.int32(options.maxEvents))
+
+    process.options = cms.untracked.PSet(
+        allowUnscheduled=cms.untracked.bool(True),
+        wantSummary=cms.untracked.bool(options.summary),
+    )
+
+    # tell the process what to run
+    process.p = cms.Path(seq + process.csvTreeMaker)
+
+except:
+    import traceback
+    traceback.print_exc()
+    raise