Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: lcorcodilos/TIMBER
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: ammitra/TIMBER
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 17 commits
  • 8 files changed
  • 2 contributors

Commits on Jun 15, 2022

  1. Copy the full SHA
    71b53c6 View commit details

Commits on Jun 20, 2022

  1. Copy the full SHA
    372bc21 View commit details

Commits on Jun 21, 2022

  1. Copy the full SHA
    78121e2 View commit details
  2. add explanatory comment

    ammitra committed Jun 21, 2022
    Copy the full SHA
    eb22a4a View commit details
  3. Copy the full SHA
    4ed805b View commit details

Commits on Aug 17, 2022

  1. Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    ce2511c View commit details

Commits on Aug 19, 2022

  1. fix missing namespaces

    ammitra committed Aug 19, 2022
    Copy the full SHA
    3f28681 View commit details
  2. fix missing math namespace

    ammitra committed Aug 19, 2022

    Verified

    This commit was signed with the committer’s verified signature.
    pracucci Marco Pracucci
    Copy the full SHA
    a9744e7 View commit details
  3. Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    43ff905 View commit details

Commits on Dec 16, 2022

  1. Merge pull request #1 from ammitra/skipEmpty

    QoL updates
    ammitra authored Dec 16, 2022
    Copy the full SHA
    71e68b2 View commit details

Commits on Dec 20, 2022

  1. Copy the full SHA
    989c013 View commit details

Commits on Dec 23, 2022

  1. minor fixes

    ammitra committed Dec 23, 2022
    Copy the full SHA
    5e15384 View commit details

Commits on Jan 10, 2023

  1. Copy the full SHA
    12a508f View commit details

Commits on Mar 1, 2023

  1. Update README.md

    point to *my* repo
    ammitra authored Mar 1, 2023
    Copy the full SHA
    a5f761c View commit details

Commits on Mar 22, 2023

  1. Copy the full SHA
    dcae53d View commit details

Commits on May 22, 2023

  1. Copy the full SHA
    9e91aa3 View commit details
  2. Merge pull request #4 from ammitra/skipEmpty

    skip empty files
    ammitra authored May 22, 2023
    Copy the full SHA
    9653ae9 View commit details
Showing with 192 additions and 26 deletions.
  1. +2 −2 README.md
  2. +51 −15 TIMBER/Analyzer.py
  3. +39 −0 TIMBER/Framework/TopPhi_modules/BranchCorrection.cc
  4. +34 −0 TIMBER/Framework/include/EffLoader_2Dfunc.h
  5. +28 −0 TIMBER/Framework/src/EffLoader_2Dfunc.cc
  6. +25 −1 TIMBER/Tools/Common.py
  7. +12 −7 TIMBER/Tools/Plot.py
  8. +1 −1 setup.sh
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@ you're obviously free to use your favorite tool for the job (you can install vir
```
python -m virtualenv timber-env
source timber-env/bin/activate
git clone https://github.com/lcorcodilos/TIMBER.git
git clone https://github.com/ammitra/TIMBER.git
cd TIMBER
source setup.sh
```
@@ -78,4 +78,4 @@ which access scale factors, calculate pileup weights, and more. These are all wr
in C++ for use in `Cut` and `Define` arguments and are provided so that users have a common tool box to share.
Additionally, the AnalysisModules folder welcomes additions of custom C++ modules on a
per-analysis basis so that the code can be properly archived for future reference and for sharing
with other analyzers.
with other analyzers.
66 changes: 51 additions & 15 deletions TIMBER/Analyzer.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
"""

from TIMBER.CollectionOrganizer import CollectionOrganizer
from TIMBER.Tools.Common import GenerateHash, GetHistBinningTuple, CompileCpp, ConcatCols, GetStandardFlags, ExecuteCmd, LoadColumnNames
from TIMBER.Tools.Common import GenerateHash, GetHistBinningTuple, CompileCpp, ConcatCols, GetStandardFlags, ExecuteCmd, LoadColumnNames, ProgressBar
from clang import cindex
from collections import OrderedDict

@@ -30,7 +30,7 @@ class analyzer(object):
When using class functions to perform actions, an active node will always be tracked so that the next action uses
the active node and assigns the output node as the new #ActiveNode"""
def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampleStr=''):
def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampleStr='',skipEmpty=True):
"""Constructor.
Sets up the tracking of actions on an RDataFrame as nodes. Also
@@ -46,6 +46,9 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
@param multiSampleStr (str, optional): If a sample was generated with multiple mass points,
define the mass which you'd like to analyze in this string. If you're unsure of your options, check the Runs TTree
for a branch `genEventSumw_YMass_<mass>`. Defaults to '' which will load `genEventSumw_`.
@param skipEmpty (bool): If the ROOT file(s) opened for processing by the analyzer have an empty Events TTree, then skip them.
By default, this is set to True, and a warning will be issued to the user if they do not wish to skip files with empty
Events trees.
"""

## @var fileName
@@ -97,6 +100,7 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
self._eventsTreeName = eventsTreeName
self._runTreeName = runTreeName
self.silent = False
self.skipEmpty = skipEmpty
if multiSampleStr != '':
multiSampleStr = 'YMass_%s'%multiSampleStr
genEventSumw_str = 'genEventSumw_'+multiSampleStr
@@ -105,13 +109,18 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
# Setup TChains for multiple or single file
self._eventsChain = ROOT.TChain(self._eventsTreeName)
self.RunChain = ROOT.TChain(runTreeName)
print ('Opening files...')
if isinstance(self.fileName,list):
for f in self.fileName:
self._addFile(f)
if isinstance(self.fileName,list): # assumes list of line-separated .root files
for f in ProgressBar(self.fileName, "Opening files: "):
self._addFile(f)
else:
self._addFile(self.fileName)

if not self.fileName.endswith(".txt"):
print("Opening file...")
self._addFile(self.fileName)
else: # opening .txt file containing line-separated .root filenames
fNames = self._parseTxt(self.fileName)
for f in ProgressBar(fNames, "Opening files: "):
self._addFile(f)

# Make base RDataFrame
BaseDataFrame = ROOT.RDataFrame(self._eventsChain)
self.BaseNode = Node('base',BaseDataFrame)
@@ -170,7 +179,14 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
for f in glob.glob(os.environ["TIMBERPATH"]+'TIMBER/Framework/include/*.h'):
if f.split('/')[-1] in skipHeaders: continue
CompileCpp('#include "%s"\n'%f)


def _parseTxt(self,f):
'''Parse .txt file and return list of all lines in it
@param f (str): .txt filename
'''
txt_file = open(f,"r")
return [l.strip() for l in txt_file.readlines()]

def _addFile(self,f):
'''Add file to TChains being tracked.
@@ -180,10 +196,25 @@ def _addFile(self,f):
if f.endswith(".root"):
if 'root://' not in f and f.startswith('/store/'):
f='root://cms-xrd-global.cern.ch/'+f
self._eventsChain.Add(f)
#self._eventsChain.Add(f)
if ROOT.TFile.Open(f,'READ') == None:
raise ReferenceError('File %s does not exist'%f)
raise ReferenceError('File %s does not exist'%f)
tempF = ROOT.TFile.Open(f,'READ')
# Check if Events tree name is in the file
existingTrees = tempF.GetListOfKeys()
treeNames = [i.GetName() for i in existingTrees]
if self._eventsTreeName not in treeNames:
print('WARNING: The following file does NOT contain an Events TTree, skipping.\n\tFile: {}'.format(f))
pass
elif tempF.Get(self._eventsTreeName).GetEntry() != 0:
self._eventsChain.Add(f)
elif tempF.Get(self._eventsTreeName).GetEntry() == 0:
if self.skipEmpty:
print("WARNING: The following file contains an empty Events TTree, skipping. If you wish to add regardless, please call the analyzer with 'skipEmpty=False'\n\tFile: {}".format(f))
pass
else:
print("WARNING: The following file contains an empty Events TTree, adding to analyzer regardless. If you wish to skip, please call analyzer with 'skipEmpty=True' (default).\n\tFile: {}".format(f))
self._eventsChain.Add(f)
if tempF.Get(self._runTreeName) != None:
self.RunChain.Add(f)
tempF.Close()
@@ -388,8 +419,6 @@ def FilterColumnNames(self,columns,node=None):
out = []
for i in columns:
if i in cols_in_node: out.append(i)
else: print ("WARNING: Column %s not found and will be dropped."%i)

return out

def GetTriggerString(self,trigList):
@@ -632,22 +661,29 @@ def ReorderCollection(self, name, basecoll, newOrderCol, skip=[]):
'''
return self.SubCollection(name, basecoll, newOrderCol, skip)

def ObjectFromCollection(self,name,basecoll,index,skip=[]):
def ObjectFromCollection(self,name,basecoll,index,skip=[],strict=True):
'''Similar to creating a SubCollection except the newly defined columns
are single values (not vectors/arrays) for the object at the provided index.
@param name (str): Name of new collection.
@param basecoll (str): Name of derivative collection.
@param index (str): Index of the collection item to extract.
@param skip ([str]): List of variable names in the collection to skip.
@param strict (bool): Whether or not to require strict definitions. I.e., if
trying to derive a new collection from "Jet" base collection, then strict
definitions would ensure only the "Jet" collections are renamed, not any
column including the word "Jet".
Returns:
None. New nodes created with the sub collection.
Example:
ObjectFromCollection('LeadJet','FatJet','0')
'''
collBranches = [str(cname) for cname in self.DataFrame.GetColumnNames() if ( (basecoll in str(cname)) and (str(cname) not in skip))]
if not strict:
collBranches = [str(cname) for cname in self.DataFrame.GetColumnNames() if ( (basecoll in str(cname)) and (str(cname) not in skip))]
else:
collBranches = [str(cname) for cname in self.DataFrame.GetColumnNames() if ( (basecoll == str(cname)[:len(basecoll)]) and (str(cname) not in skip))]
for b in collBranches:
replacementName = b.replace(basecoll,name)
if b == 'n'+basecoll:
39 changes: 39 additions & 0 deletions TIMBER/Framework/TopPhi_modules/BranchCorrection.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include <ROOT/RVec.hxx>
/**
* @class BranchCorrection
* @brief Trivial class to load a branch as correction in TIMBER.
Taken from https://github.com/mroguljic/TIMBER/blob/Zbb_branch_py3/TIMBER/Framework/Zbb_modules/BranchCorrection.cc
*/
using namespace ROOT::VecOps;
class BranchCorrection {

public:
BranchCorrection(){};
~BranchCorrection(){};
RVec<float> evalCorrection(float val);
RVec<float> evalWeight(float val,float valUp,float valDown);
RVec<float> evalUncert(float valUp,float valDown);

};


RVec<float> BranchCorrection::evalCorrection(float val){
RVec<float> correction(1);
correction[0]=val;
return correction;
};

RVec<float> BranchCorrection::evalWeight(float val,float valUp,float valDown){
RVec<float> weight(3);
weight[0]=val;
weight[1]=valUp;
weight[2]=valDown;
return weight;
};

RVec<float> BranchCorrection::evalUncert(float valUp,float valDown){
RVec<float> uncert(2);
uncert[0]=valUp;
uncert[1]=valDown;
return uncert;
};
34 changes: 34 additions & 0 deletions TIMBER/Framework/include/EffLoader_2Dfunc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#ifndef _TIMBER_EFFLOADER_2DFUNC
#define _TIMBER_EFFLOADER_2DFUNC
#include <string>
#include "TFile.h"
#include "TEfficiency.h"
#include "TF2.h"
#include "TFitResultPtr.h"
#include "TFitResult.h"

/**
* Class for handling efficiencies whose uncertainties are being evaluated via a fitted function.
* A ROOT file containing the TEfficiency, TH2, TF2, and TFitResultPtr from the fit of the TF2 to
* the TEfficiency must be passed to the class constructor.
* NOTE: fitting a 2D TEfficiency only works in ROOT versions >6.28, so the fitting may have
* to be done outside of the CMSSW and the TF2 and TFitResultPtr saved to the file externally.
*/

class EffLoader_2Dfunc {
private:
TFile *file;
TEfficiency *efficiency;
TF2 *func;
TFitResultPtr *resultPtr;
float effval;
float effup;
float effdown;

public:
EffLoader_2Dfunc();
EffLoader_2Dfunc(std::string filename, std::string funcname, std::string effname);
std::vector<float> eval(float xval, float yval);
};

#endif
28 changes: 28 additions & 0 deletions TIMBER/Framework/src/EffLoader_2Dfunc.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "../include/EffLoader_2Dfunc.h"

EffLoader_2Dfunc::EffLoader_2Dfunc(){}

EffLoader_2Dfunc::EffLoader_2Dfunc(std::string filename, std::string funcname, std::string effname) {
file = TFile::Open(filename.c_str());
efficiency = (TEfficiency*)file->Get(effname.c_str());
func = (TF2*)file->Get(funcname.c_str());
resultPtr = (TFitResultPtr*)file->Get(("TBinomialEfficiencyFitter_result_of_"+funcname).c_str());
}

std::vector<float> EffLoader_2Dfunc::eval(float xval, float yval) {
// Get nominal value of the efficiency at evaluated point
effval = func->Eval(xval,yval);
// Now get the uncertainty at that point
double ci[1];
double points[] = {xval,yval};
int stride1 = 2;
int stride2 = 1;
// have to get the underlying TFitResult from the TFitResultPtr before accessing GetConfidenceIntervals(), otherwise compiler breaks
TFitResult* result = resultPtr->Get();
result->GetConfidenceIntervals(2, stride1, stride2, points, ci, 0.683, false);
// Now get the up and down variations on the uncertainty
effup = effval + ci[0];
effdown = effval - ci[0];
return {effval,effup,effdown};
}

26 changes: 25 additions & 1 deletion TIMBER/Tools/Common.py
Original file line number Diff line number Diff line change
@@ -646,4 +646,28 @@ def GenerateHash(length=8):
'''
return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for i in range(length))

## @}
def ProgressBar(it, prefix="", size=60, out=sys.stdout):
'''Generate a progress bar from any iterable of a given size. Taken from: https://stackoverflow.com/a/34482761
Usage:
for i in ProgressBar(it):
# do something
@param it (iterable): Any iterable (dict, list, etc) with which to generate the amount of elements in the bar
@param prefix (str, optional): Prefix string to prepend to progress bar
@param size (int): Length of progress bar in characters
@param out (ostream): Output stream, i.e. file, stdout, stderr, etc
'''
count = len(it)
def show(j):
x = int(size*j/count)
out.write("%s[%s%s] %i/%i\r" % (prefix, u"#"*x, "."*(size-x), j, count))
out.flush()
show(0)
for i, item in enumerate(it):
yield item # return the actual item (e.g. filename) without finishing function execution
show(i+1)
out.write("\n")
out.flush()

## @}
19 changes: 12 additions & 7 deletions TIMBER/Tools/Plot.py
Original file line number Diff line number Diff line change
@@ -112,7 +112,7 @@ def CompareShapes(outfilename,year,prettyvarname,bkgs={},signals={},names={},col
# If bkg, set fill color and add to stack
if pname in bkgs.keys():
h.SetFillColorAlpha(colors[pname],0.2 if not stackBkg else 1)
h.SetLineWidth(0)
h.SetLineWidth(0)
if stackBkg: bkgStack.Add(h)
if colors[pname] not in colors_in_legend:
legend.AddEntry(h,leg_name,'f')
@@ -163,21 +163,25 @@ def CompareShapes(outfilename,year,prettyvarname,bkgs={},signals={},names={},col

if len(bkgs.keys()) > 0:
if stackBkg:
# First, draw background THStack and do axis labels
bkgStack.Draw('hist')
bkgStack.GetXaxis().SetTitleOffset(1.1)
_doAxisTitles(bkgStack,split=doSoverB)
# Now, create transparent histogram with black edges to go over the total
total = bkgStack.GetHists().First().Clone()
total.Reset()
for stack_hist in bkgStack.GetHists():
total.Add(stack_hist)
total.SetLineColorAlpha(ROOT.kBlack,1)
total.SetLineWidth(1)
total.SetFillColorAlpha(ROOT.kBlack,0)
# Re-draw the background THStack
bkgStack.Draw('hist')
total.Draw('histsame')
# Draw the transparent histogram to give a black edge over final result
total.Draw('histsame')
else:
for bkg in bkgs.values():
bkgStack.GetXaxis().SetTitleOffset(1.1)
bkg.GetXaxis().SetTitleOffset(1.1)
_doAxisTitles(bkg,split=doSoverB)
bkg.Draw('same hist')
for h in signals.values():
@@ -228,6 +232,7 @@ def CompareShapes(outfilename,year,prettyvarname,bkgs={},signals={},names={},col
CMS_lumi.CMS_lumi(c, iPeriod=year, sim=True)

c.Print(outfilename,outfilename.split('.')[-1])
c.Close() # close canvas to prevent segfault

def MakeSoverB(stack_of_bkgs,signal,forceForward=False,forceBackward=False):
'''Makes the SoverB distribution and returns it.
@@ -590,14 +595,14 @@ def EasyPlots(name, histlist, bkglist=[],signals=[],colors=[],titles=[],logy=Fal

# Do the signals
if len(signals) > 0:
signals[hist_index].SetLineColor(kBlue)
signals[hist_index].SetLineColor(ROOT.kBlue)
signals[hist_index].SetLineWidth(2)
if logy == True:
signals[hist_index].SetMinimum(1e-3)
legends[hist_index].AddEntry(signals[hist_index],signals[hist_index].GetName().split('_')[0],'L')
signals[hist_index].Draw('hist same')

tot_hists[hist_index].SetFillColor(kBlack)
tot_hists[hist_index].SetFillColor(ROOT.kBlack)
tot_hists[hist_index].SetFillStyle(3354)

tot_hists[hist_index].Draw('e2 same')
@@ -665,9 +670,9 @@ def MakePullPlot( data,bkg):
ibkg_err = abs(bkg_down.GetBinContent(ibin)-bkg.GetBinContent(ibin))

if idata_err != None: # deal with case when there's no data error (ie. bin content = 0)
sigma = sqrt(idata_err*idata_err + ibkg_err*ibkg_err)
sigma = math.sqrt(idata_err*idata_err + ibkg_err*ibkg_err)
else:
sigma = sqrt(ibkg_err*ibkg_err)
sigma = math.sqrt(ibkg_err*ibkg_err)

if sigma != 0 :
ipull = (pull.GetBinContent(ibin))/sigma
2 changes: 1 addition & 1 deletion setup.sh
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ fi

if [ ! -d "bin/libarchive" ]
then
git clone https://github.com/libarchive/libarchive.git
git clone -b v3.6.2 https://github.com/libarchive/libarchive.git
cd libarchive
cmake . -DCMAKE_INSTALL_PREFIX=../bin/libarchive
make