lcorcodilos · Jun 15, 2022 · Jun 20, 2022 · Jun 21, 2022 · Jun 21, 2022 · Jun 21, 2022
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ you're obviously free to use your favorite tool for the job (you can install vir
 ```
 python -m virtualenv timber-env
 source timber-env/bin/activate
-git clone https://github.com/lcorcodilos/TIMBER.git
+git clone https://github.com/ammitra/TIMBER.git
 cd TIMBER
 source setup.sh
 ```
@@ -78,4 +78,4 @@ which access scale factors, calculate pileup weights, and more. These are all wr
 in C++ for use in `Cut` and `Define` arguments and are provided so that users have a common tool box to share. 
 Additionally, the AnalysisModules folder welcomes additions of custom C++ modules on a 
 per-analysis basis so that the code can be properly archived for future reference and for sharing
-with other analyzers.
+with other analyzers.
diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py
@@ -5,7 +5,7 @@
 """
 
 from TIMBER.CollectionOrganizer import CollectionOrganizer
-from TIMBER.Tools.Common import GenerateHash, GetHistBinningTuple, CompileCpp, ConcatCols, GetStandardFlags, ExecuteCmd, LoadColumnNames
+from TIMBER.Tools.Common import GenerateHash, GetHistBinningTuple, CompileCpp, ConcatCols, GetStandardFlags, ExecuteCmd, LoadColumnNames, ProgressBar
 from clang import cindex
 from collections import OrderedDict
 
@@ -30,7 +30,7 @@ class analyzer(object):
 
     When using class functions to perform actions, an active node will always be tracked so that the next action uses 
     the active node and assigns the output node as the new #ActiveNode"""
-    def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampleStr=''):
+    def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampleStr='',skipEmpty=True):
         """Constructor.
         
         Sets up the tracking of actions on an RDataFrame as nodes. Also
@@ -46,6 +46,9 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
         @param multiSampleStr (str, optional): If a sample was generated with multiple mass points,
                 define the mass which you'd like to analyze in this string. If you're unsure of your options, check the Runs TTree
                 for a branch `genEventSumw_YMass_<mass>`. Defaults to '' which will load `genEventSumw_`.
+	@param skipEmpty (bool): If the ROOT file(s) opened for processing by the analyzer have an empty Events TTree, then skip them.
+		By default, this is set to True, and a warning will be issued to the user if they do not wish to skip files with empty
+		Events trees. 
         """
 
         ## @var fileName
@@ -97,6 +100,7 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
         self._eventsTreeName = eventsTreeName
         self._runTreeName = runTreeName
         self.silent = False
+	self.skipEmpty = skipEmpty
         if multiSampleStr != '':
             multiSampleStr = 'YMass_%s'%multiSampleStr
         genEventSumw_str = 'genEventSumw_'+multiSampleStr
@@ -105,13 +109,18 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
         # Setup TChains for multiple or single file
         self._eventsChain = ROOT.TChain(self._eventsTreeName) 
         self.RunChain = ROOT.TChain(runTreeName) 
-        print ('Opening files...')
-        if isinstance(self.fileName,list):
-            for f in self.fileName:
-                self._addFile(f)
+        if isinstance(self.fileName,list):	# assumes list of line-separated .root files
+	    for f in ProgressBar(self.fileName, "Opening files: "):
+	        self._addFile(f)
         else:
-            self._addFile(self.fileName)
-
+	    if not self.fileName.endswith(".txt"):
+		print("Opening file...")
+                self._addFile(self.fileName)
+            else:	# opening .txt file containing line-separated .root filenames
+		fNames = self._parseTxt(self.fileName)
+		for f in ProgressBar(fNames, "Opening files: "):
+		    self._addFile(f)
+
         # Make base RDataFrame
         BaseDataFrame = ROOT.RDataFrame(self._eventsChain) 
         self.BaseNode = Node('base',BaseDataFrame) 
@@ -170,7 +179,14 @@ def __init__(self,fileName,eventsTreeName="Events",runTreeName="Runs",multiSampl
         for f in glob.glob(os.environ["TIMBERPATH"]+'TIMBER/Framework/include/*.h'):
             if f.split('/')[-1] in skipHeaders: continue
             CompileCpp('#include "%s"\n'%f)
-
+
+    def _parseTxt(self,f):
+	'''Parse .txt file and return list of all lines in it
+	@param f (str): .txt filename
+	'''
+	txt_file = open(f,"r")
+	return [l.strip() for l in txt_file.readlines()] 
+
     def _addFile(self,f):
         '''Add file to TChains being tracked.
 
@@ -180,10 +196,25 @@ def _addFile(self,f):
         if f.endswith(".root"): 
             if 'root://' not in f and f.startswith('/store/'):
                 f='root://cms-xrd-global.cern.ch/'+f
-            self._eventsChain.Add(f)
+            #self._eventsChain.Add(f)
             if ROOT.TFile.Open(f,'READ') == None:
-                raise ReferenceError('File %s does not exist'%f)
+                raise ReferenceError('File %s does not exist'%f)	    
             tempF = ROOT.TFile.Open(f,'READ')
+	    # Check if Events tree name is in the file
+	    existingTrees = tempF.GetListOfKeys()
+	    treeNames = [i.GetName() for i in existingTrees]
+	    if self._eventsTreeName not in treeNames:
+		print('WARNING: The following file does NOT contain an Events TTree, skipping.\n\tFile: {}'.format(f))
+		pass
+	    elif tempF.Get(self._eventsTreeName).GetEntry() != 0:
+		self._eventsChain.Add(f)
+	    elif tempF.Get(self._eventsTreeName).GetEntry() == 0:
+		if self.skipEmpty:
+		    print("WARNING: The following file contains an empty Events TTree, skipping. If you wish to add regardless, please call the analyzer with 'skipEmpty=False'\n\tFile: {}".format(f))
+		    pass
+		else:
+		    print("WARNING: The following file contains an empty Events TTree, adding to analyzer regardless. If you wish to skip, please call analyzer with 'skipEmpty=True' (default).\n\tFile: {}".format(f))
+		    self._eventsChain.Add(f)
             if tempF.Get(self._runTreeName) != None:
                 self.RunChain.Add(f)
             tempF.Close()
@@ -388,8 +419,6 @@ def FilterColumnNames(self,columns,node=None):
         out = []
         for i in columns:
             if i in cols_in_node: out.append(i)
-            else: print ("WARNING: Column %s not found and will be dropped."%i)
-
         return out
 
     def GetTriggerString(self,trigList):
@@ -632,22 +661,29 @@ def ReorderCollection(self, name, basecoll, newOrderCol, skip=[]):
         '''
         return self.SubCollection(name, basecoll, newOrderCol, skip)
 
-    def ObjectFromCollection(self,name,basecoll,index,skip=[]):
+    def ObjectFromCollection(self,name,basecoll,index,skip=[],strict=True):
         '''Similar to creating a SubCollection except the newly defined columns
         are single values (not vectors/arrays) for the object at the provided index.
         
         @param name (str): Name of new collection.
         @param basecoll (str): Name of derivative collection.
         @param index (str): Index of the collection item to extract.
         @param skip ([str]): List of variable names in the collection to skip.
+        @param strict (bool): Whether or not to require strict definitions. I.e., if
+            trying to derive a new collection from "Jet" base collection, then strict
+            definitions would ensure only the "Jet" collections are renamed, not any
+            column including the word "Jet".
 
         Returns:
             None. New nodes created with the sub collection.
 
         Example:
             ObjectFromCollection('LeadJet','FatJet','0')
         '''
-        collBranches = [str(cname) for cname in self.DataFrame.GetColumnNames() if ( (basecoll in str(cname)) and (str(cname) not in skip))]
+        if not strict:
+            collBranches = [str(cname) for cname in self.DataFrame.GetColumnNames() if ( (basecoll in str(cname)) and (str(cname) not in skip))]
+        else:
+            collBranches = [str(cname) for cname in self.DataFrame.GetColumnNames() if ( (basecoll == str(cname)[:len(basecoll)]) and (str(cname) not in skip))]
         for b in collBranches:
             replacementName = b.replace(basecoll,name)
             if b == 'n'+basecoll:

diff --git a/TIMBER/Framework/TopPhi_modules/BranchCorrection.cc b/TIMBER/Framework/TopPhi_modules/BranchCorrection.cc
@@ -0,0 +1,39 @@
+#include <ROOT/RVec.hxx>
+/**
+ * @class BranchCorrection
+ * @brief Trivial class to load a branch as correction in TIMBER.
+   Taken from https://github.com/mroguljic/TIMBER/blob/Zbb_branch_py3/TIMBER/Framework/Zbb_modules/BranchCorrection.cc
+ */
+using namespace ROOT::VecOps;
+class BranchCorrection {
+
+    public:
+        BranchCorrection(){};
+        ~BranchCorrection(){};
+        RVec<float> evalCorrection(float val);
+        RVec<float> evalWeight(float val,float valUp,float valDown);
+        RVec<float> evalUncert(float valUp,float valDown);
+
+};
+
+
+RVec<float> BranchCorrection::evalCorrection(float val){
+    RVec<float> correction(1);
+    correction[0]=val;
+    return correction;
+};
+
+RVec<float> BranchCorrection::evalWeight(float val,float valUp,float valDown){
+    RVec<float> weight(3);
+    weight[0]=val;
+    weight[1]=valUp;
+    weight[2]=valDown;
+    return weight;
+};
+
+RVec<float> BranchCorrection::evalUncert(float valUp,float valDown){
+    RVec<float> uncert(2);
+    uncert[0]=valUp;
+    uncert[1]=valDown;
+    return uncert;
+};
diff --git a/TIMBER/Framework/include/EffLoader_2Dfunc.h b/TIMBER/Framework/include/EffLoader_2Dfunc.h
@@ -0,0 +1,34 @@
+#ifndef _TIMBER_EFFLOADER_2DFUNC
+#define _TIMBER_EFFLOADER_2DFUNC
+#include <string>
+#include "TFile.h"
+#include "TEfficiency.h"
+#include "TF2.h"
+#include "TFitResultPtr.h"
+#include "TFitResult.h"
+
+/**
+ * Class for handling efficiencies whose uncertainties are being evaluated via a fitted function.
+ * A ROOT file containing the TEfficiency, TH2, TF2, and TFitResultPtr from the fit of the TF2 to
+ * the TEfficiency must be passed to the class constructor.
+ * 	NOTE: fitting a 2D TEfficiency only works in ROOT versions >6.28, so the fitting may have 
+ * 	to be done outside of the CMSSW and the TF2 and TFitResultPtr saved to the file externally.
+ */ 
+
+class EffLoader_2Dfunc {
+    private:
+	TFile *file;
+	TEfficiency *efficiency;
+	TF2 *func;
+	TFitResultPtr *resultPtr;
+        float effval;
+        float effup;
+        float effdown;
+
+    public:
+	EffLoader_2Dfunc();
+	EffLoader_2Dfunc(std::string filename, std::string funcname, std::string effname);
+	std::vector<float> eval(float xval, float yval);
+};
+
+#endif
diff --git a/TIMBER/Framework/src/EffLoader_2Dfunc.cc b/TIMBER/Framework/src/EffLoader_2Dfunc.cc
@@ -0,0 +1,28 @@
+#include "../include/EffLoader_2Dfunc.h"
+
+EffLoader_2Dfunc::EffLoader_2Dfunc(){}
+
+EffLoader_2Dfunc::EffLoader_2Dfunc(std::string filename, std::string funcname, std::string effname) {
+    file = TFile::Open(filename.c_str());
+    efficiency = (TEfficiency*)file->Get(effname.c_str());
+    func = (TF2*)file->Get(funcname.c_str());
+    resultPtr = (TFitResultPtr*)file->Get(("TBinomialEfficiencyFitter_result_of_"+funcname).c_str());
+}
+
+std::vector<float> EffLoader_2Dfunc::eval(float xval, float yval) {
+    // Get nominal value of the efficiency at evaluated point
+    effval = func->Eval(xval,yval);
+    // Now get the uncertainty at that point
+    double ci[1];
+    double points[] = {xval,yval};
+    int stride1 = 2;
+    int stride2 = 1;
+    // have to get the underlying TFitResult from the TFitResultPtr before accessing GetConfidenceIntervals(), otherwise compiler breaks
+    TFitResult* result = resultPtr->Get();
+    result->GetConfidenceIntervals(2, stride1, stride2, points, ci, 0.683, false);
+    // Now get the up and down variations on the uncertainty
+    effup = effval + ci[0];
+    effdown = effval - ci[0];
+    return {effval,effup,effdown};
+}
+
diff --git a/TIMBER/Tools/Common.py b/TIMBER/Tools/Common.py
@@ -646,4 +646,28 @@ def GenerateHash(length=8):
     '''
     return ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for i in range(length))
 
-## @}
+def ProgressBar(it, prefix="", size=60, out=sys.stdout):
+    '''Generate a progress bar from any iterable of a given size. Taken from: https://stackoverflow.com/a/34482761
+    Usage:
+
+    for i in ProgressBar(it):
+        # do something
+
+    @param it (iterable): Any iterable (dict, list, etc) with which to generate the amount of elements in the bar
+    @param prefix (str, optional): Prefix string to prepend to progress bar
+    @param size (int): Length of progress bar in characters
+    @param out (ostream): Output stream, i.e. file, stdout, stderr, etc
+    '''
+    count = len(it)
+    def show(j):
+        x = int(size*j/count)
+        out.write("%s[%s%s] %i/%i\r" % (prefix, u"#"*x, "."*(size-x), j, count))
+        out.flush()        
+    show(0)
+    for i, item in enumerate(it):
+        yield item	# return the actual item (e.g. filename) without finishing function execution
+        show(i+1)
+    out.write("\n")
+    out.flush()
+
+## @}
diff --git a/TIMBER/Tools/Plot.py b/TIMBER/Tools/Plot.py
@@ -112,7 +112,7 @@ def CompareShapes(outfilename,year,prettyvarname,bkgs={},signals={},names={},col
         # If bkg, set fill color and add to stack
         if pname in bkgs.keys():
             h.SetFillColorAlpha(colors[pname],0.2 if not stackBkg else 1)
-            h.SetLineWidth(0) 
+	    h.SetLineWidth(0) 
             if stackBkg: bkgStack.Add(h)
             if colors[pname] not in colors_in_legend:
                 legend.AddEntry(h,leg_name,'f')
@@ -163,21 +163,25 @@ def CompareShapes(outfilename,year,prettyvarname,bkgs={},signals={},names={},col
 
     if len(bkgs.keys()) > 0:
         if stackBkg:
+	    # First, draw background THStack and do axis labels
             bkgStack.Draw('hist')
             bkgStack.GetXaxis().SetTitleOffset(1.1)
             _doAxisTitles(bkgStack,split=doSoverB)
+	    # Now, create transparent histogram with black edges to go over the total
             total = bkgStack.GetHists().First().Clone()
             total.Reset()
             for stack_hist in bkgStack.GetHists():
                 total.Add(stack_hist)
             total.SetLineColorAlpha(ROOT.kBlack,1)
             total.SetLineWidth(1)
             total.SetFillColorAlpha(ROOT.kBlack,0)
+	    # Re-draw the background THStack
             bkgStack.Draw('hist')
-            total.Draw('histsame')
+	    # Draw the transparent histogram to give a black edge over final result
+            total.Draw('histsame') 
         else:
             for bkg in bkgs.values():
-                bkgStack.GetXaxis().SetTitleOffset(1.1)
+                bkg.GetXaxis().SetTitleOffset(1.1)
                 _doAxisTitles(bkg,split=doSoverB)
                 bkg.Draw('same hist')
     for h in signals.values():
@@ -228,6 +232,7 @@ def CompareShapes(outfilename,year,prettyvarname,bkgs={},signals={},names={},col
     CMS_lumi.CMS_lumi(c, iPeriod=year, sim=True)
 
     c.Print(outfilename,outfilename.split('.')[-1])
+    c.Close()	# close canvas to prevent segfault
 
 def MakeSoverB(stack_of_bkgs,signal,forceForward=False,forceBackward=False):
     '''Makes the SoverB distribution and returns it.
@@ -590,14 +595,14 @@ def EasyPlots(name, histlist, bkglist=[],signals=[],colors=[],titles=[],logy=Fal
 
                 # Do the signals
                 if len(signals) > 0: 
-                    signals[hist_index].SetLineColor(kBlue)
+                    signals[hist_index].SetLineColor(ROOT.kBlue)
                     signals[hist_index].SetLineWidth(2)
                     if logy == True:
                         signals[hist_index].SetMinimum(1e-3)
                     legends[hist_index].AddEntry(signals[hist_index],signals[hist_index].GetName().split('_')[0],'L')
                     signals[hist_index].Draw('hist same')
 
-                tot_hists[hist_index].SetFillColor(kBlack)
+                tot_hists[hist_index].SetFillColor(ROOT.kBlack)
                 tot_hists[hist_index].SetFillStyle(3354)
 
                 tot_hists[hist_index].Draw('e2 same')
@@ -665,9 +670,9 @@ def MakePullPlot( data,bkg):
             ibkg_err = abs(bkg_down.GetBinContent(ibin)-bkg.GetBinContent(ibin))
 
         if idata_err != None: # deal with case when there's no data error (ie. bin content = 0)
-            sigma = sqrt(idata_err*idata_err + ibkg_err*ibkg_err)
+            sigma = math.sqrt(idata_err*idata_err + ibkg_err*ibkg_err)
         else:
-            sigma = sqrt(ibkg_err*ibkg_err)
+            sigma = math.sqrt(ibkg_err*ibkg_err)
 
         if sigma != 0 :
             ipull = (pull.GetBinContent(ibin))/sigma

diff --git a/setup.sh b/setup.sh
@@ -29,7 +29,7 @@ fi
 
 if [ ! -d "bin/libarchive" ] 
 then
-  git clone https://github.com/libarchive/libarchive.git
+  git clone -b v3.6.2 https://github.com/libarchive/libarchive.git
   cd libarchive
   cmake . -DCMAKE_INSTALL_PREFIX=../bin/libarchive
   make