ra4_macros  bede988c286599a3a84b77a4d788ac0a971e89f9
rpv_bkg_syst.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 """This script plots bkg systematics for the RPV analysis"""
3 import sys
4 import math
5 import ROOT
6 from array import array
7 import argparse
8 parser = argparse.ArgumentParser()
9 parser.add_argument("-i", "--input")
10 
11 args = parser.parse_args()
12 if (args.input):
13  infile = args.input
14 else:
15  sys.exit("Please provide an input root file")
16 
17 
18 verbose = True
19 one_pdf = False #put all plots in one pdf file
20 
21 # function to get pointers to histogram in root file
22 def get_hist_with_overflow(file,histname):
23  if verbose:
24  print" getting "+histname
25  hist = file.Get(histname)
26  nbinsX = hist.GetNbinsX()
27  content = hist.GetBinContent(nbinsX) + hist.GetBinContent(nbinsX+1)
28  error = math.sqrt(math.pow(hist.GetBinError(nbinsX),2) + math.pow(hist.GetBinError(nbinsX+1),2))
29  hist.SetBinContent(nbinsX,content)
30  hist.SetBinContent(nbinsX+1,0) #avoid adding overflow N times for N calls to function
31  hist.SetBinError(nbinsX+1,0)
32  hist.SetBinError(nbinsX,error)
33  return hist
34 
35 #This function calculates symmetrized relative errors for a single variation in a single kinematic bin
36 #First, find new floating normalization, both for the variation up and down
37 #Then, using all processes, find the difference from nominal for up and down
38 #Return a histogram binned in Nb with the mean of the absolute value of relative errors up and down
39 def get_symmetrized_relative_errors(sysName,tot_data,total_nominal,procList,floating_process,sysFile,directory):
40 
41 
42  # total hists for each variation, to include all processes
43  systHistUp = ROOT.TH1F(directory+"_"+sysName+"_u","",5,0,5)
44  systHistDown = ROOT.TH1F(directory+"_"+sysName+"_d","",5,0,5)
45 
46  #list of hists, one for each process
47  histsUp = []
48  histsDown = []
49  totUp=tot_data
50  totDown=tot_data
51  floating = -999.
52  #load hists and calculate SFs for floating component for each variation
53  for ip,proc in enumerate(procList):
54  fixed = (ip != floating_process) # identify if this process has fixed or floating normalization
55 
56  up = get_hist_with_overflow(sysFile,directory + "/" + proc + "_" + sysName + "Up")
57  down = get_hist_with_overflow(sysFile,directory + "/" + proc + "_" + sysName + "Down")
58 
59  histsUp.append(up)
60  histsDown.append(down)
61  print "Up fixed "+str(fixed)+", yield "+str(up.Integral())
62  print "Down fixed "+str(fixed)+", yield "+str(down.Integral())
63  #subtract all FIXED components from total observed yield
64  if fixed:
65  totUp -= up.Integral()
66  totDown -= down.Integral()
67 
68  # keep track of FLOATING yield
69  else:
70  floatingUp = up.Integral()
71  floatingDown = down.Integral()
72 
73  print "floatingDown = "+str(floatingDown)
74 
75  #scale factor for floating is just the leftover data, after fixed subtraction, divided by floating yield
76  sfUp = totUp/floatingUp
77  sfDown = totDown/floatingDown
78  if tot_data ==0:
79  sfUp= 1.
80  sfDown=1.
81  histsUp[floating_process].Scale(sfUp)
82  histsDown[floating_process].Scale(sfDown)
83 
84 
85  if verbose:
86  print "variation UP SF, "+directory+", floating process "+str(floating_process)+", "+str(sfUp)
87  print "variation DOWN SF, "+directory+", floating process "+str(floating_process)+", "+str(sfDown)
88  runningtotalu=0
89  runningtotald=0
90  for ip,proc in enumerate(procList):
91  print "Process "+proc+": "+str(histsNom[ip].Integral())
92  runningtotalu+=histsUp[ip].Integral()
93  runningtotald+=histsDown[ip].Integral()
94 
95  print "UP MC total: "+str(runningtotalu)
96  print "DOWN MC total: "+str(runningtotald)
97  print "Data: "+str(tot_data)
98 
99 
100 
101  #Get total histogram that includes all processes
102  for ip,proc in enumerate(procList):
103  systHistUp.Add(histsUp[ip])
104  systHistDown.Add(histsDown[ip])
105 
106  #Subtract nominal histogram from varied histograms
107  systHistUp.Add(total_nominal,-1)
108  systHistDown.Add(total_nominal,-1)
109 
110  for i in range(1,systHistUp.GetNbinsX()+1):
111  #Find absolute value of deviation
112  systHistUp.SetBinContent(i,abs(systHistUp.GetBinContent(i)))
113  systHistDown.SetBinContent(i,abs(systHistDown.GetBinContent(i)))
114 
115 
116  #Fill histUp with symmetrized error by adding histDown and dividing by 2, then divide by nominal to get relative symmetrized error
117  systHistUp.Add(systHistDown)
118  systHistUp.Scale(0.5)
119  systHistUp.Divide(total_nominal) # now systHistUp contains our relative errors
120  return systHistUp
121 
122 
123 def set_palette_gray(ncontours=20):
124  #stops = [0.00, 0.25, 0.50, 0.75, 1.00]
125  stops = [0.00, 0.10, 0.50, 0.90, 1.00]
126  red = [1.00, 0.80, 0.65, 0.50, 0.34]
127  green = [1.00, 0.80, 0.65, 0.50, 0.34]
128  blue = [1.00, 0.80, 0.65, 0.50, 0.34]
129  s = array('d', stops)
130  r = array('d', red)
131  g = array('d', green)
132  b = array('d', blue)
133  npoints = len(s)
134  fi = ROOT.TColor.CreateGradientColorTable(npoints, s, r, g, b, ncontours)
135  ROOT.gStyle.SetNumberContours(ncontours)
136 
137 
138 
139 
140 #Main body of code
141 
142 ROOT.gROOT.SetBatch(ROOT.kTRUE) #prevent th1->Draw() from trying to open X11 window
143 ROOT.gStyle.SetCanvasDefW(600)
144 ROOT.gStyle.SetCanvasDefH(600)
145 ROOT.gStyle.SetTitleOffset(1.2,"x")
146 #ROOT.gStyle.SetTitleOffset(1.7,"y")
147 ROOT.gStyle.SetTitleOffset(1.7,"z")
148 #ROOT.gStyle.SetPadRightMargin(0.19)
149 ROOT.gStyle.SetPadLeftMargin(0.12)
150 ROOT.gStyle.SetPadBottomMargin(0.12)
151 ROOT.gStyle.SetPadTopMargin(0.08)
152 ROOT.gStyle.SetPaintTextFormat("6.1f");
153 
154 ROOT.gStyle.SetLabelFont(42)
155 ROOT.gStyle.SetLabelSize(0.05)
156 ROOT.gStyle.SetTitleFont(42)
157 ROOT.gStyle.SetTitleSize(0.07)
158 
160 
161 #list of processes
162 procList=["qcd","ttbar","wjets","other"]
163 
164 #make list of systematics- name, title, plot color and line style
165 #WARNING The phrases 'pdf' and 'mu' trigger special handling later on
166 
167 systList=[]
168 ## for i in range(0,100):
169 ## if i == 26 or i == 46: continue
170 ## systList.append(["w_pdf"+str(i),"PDF "+str(i),i,1])
171 systList.append(["btag_bc","b,c jet b-tag SF",2,1])
172 systList.append(["btag_udsg","u,d,s,g jet b-tag SF",3,1])
173 systList.append(["jes","Jet energy scale",4,1])
174 systList.append(["jer","Jet energy resolution",5,1])
175 systList.append(["lep_eff","Lepton efficiency",6,1])
176 systList.append(["pileup","Pileup",7,1])
177 #systList.append(["gs","Gluon splitting",9,1])
178 systList.append(["qcd_flavor","QCD flavor",8,1])
179 systList.append(["gs45","Gluon splitting (N_{jet}=4,5)",9,1])
180 systList.append(["gs67","Gluon splitting (N_{jet}=6,7)",10,1])
181 systList.append(["gs89","Gluon splitting (N_{jet}=8,9)",11,1])
182 systList.append(["gs10Inf","Gluon splitting (N_{jet}#geq10)",12,1])
183 systList.append(["ttbar_pt","Top quark p_{T}",13,1])
184 systList.append(["mur","Renormalization scale",14,1])
185 systList.append(["muf","Factorization scale",15,1])
186 systList.append(["murf","Renorm. and fact. scale",16,1])
187 systList.append(["pdf","PDF",17,1])
188 systList.append(["mc_stat","MC statistics",1,2])
189 
190 nSyst = len(systList)
191 #make list of bins
192 
193 binList = []
194 binList.append(["bin0","4 #leq n_{jets} #leq 5","500 #leq M_{J} < 800 GeV","n_{lep} = 0"])
195 binList.append(["bin1","6 #leq n_{jets} #leq 7","500 #leq M_{J} < 800 GeV","n_{lep} = 0"])
196 binList.append(["bin2","4 #leq n_{jets} #leq 5","500 #leq M_{J} < 800 GeV","n_{lep} = 1"])
197 binList.append(["bin3","4 #leq n_{jets} #leq 5","M_{J} #geq 800 GeV","n_{lep} = 0"])
198 binList.append(["bin4","6 #leq n_{jets} #leq 7","M_{J} #geq 800 GeV","n_{lep} = 0"])
199 binList.append(["bin5","4 #leq n_{jets} #leq 5","M_{J} #geq 800 GeV","n_{lep} = 1"])
200 #
201 #binList.append(["bin6","4 #leq n_{jets} #leq 5","300 #leq M_{J} < 500 GeV","n_{lep} = 0"])
202 #binList.append(["bin7","6 #leq n_{jets} #leq 7","300 #leq M_{J} < 500 GeV","n_{lep} = 0"])
203 #binList.append(["bin8","8 #leq n_{jets} #leq 9","300 #leq M_{J} < 500 GeV","n_{lep} = 0"])
204 #binList.append(["bin9","n_{jets} #geq 10","300 #leq M_{J} < 500 GeV","n_{lep} = 0"])
205 # signal regions
206 binList.append(["bin10","n_{jets} #geq 10","500 #leq M_{J} < 800 GeV","n_{lep} = 0"])
207 binList.append(["bin11","6 #leq n_{jets} #leq 7","500 #leq M_{J} < 800 GeV","n_{lep} = 1"])
208 binList.append(["bin12","n_{jets} #geq 8","500 #leq M_{J} < 800 GeV","n_{lep} = 1"])
209 binList.append(["bin13","n_{jets} #geq 10","M_{J} #geq 800 GeV","n_{lep} = 0"])
210 binList.append(["bin14","6 #leq n_{jets} #leq 7","M_{J} #geq 800 GeV","n_{lep} = 1"])
211 binList.append(["bin15","n_{jets} #geq 8","M_{J} #geq 800 GeV","n_{lep} = 1"])
212 binList.append(["bin16","8 #leq n_{jets} #leq 9","500 #leq M_{J} < 800 GeV","n_{lep} = 0"])
213 binList.append(["bin17","8 #leq n_{jets} #leq 9","M_{J} #geq 800 GeV","n_{lep} = 0"])
214 
215 
216 sysFile = ROOT.TFile(infile,"read")
217 for ibin in binList:
218  directory = ibin[0]
219  if verbose:
220  print "directory is "+directory
221 
222  #get data observations for this bin in order to find normalization for floating process
223  data = get_hist_with_overflow(sysFile,directory + "/data_obs")
224  tot_data = data.Integral()
225  nbinsX = data.GetNbinsX()
226 
227  histsNom = []
228  tot = tot_data
229  floating = -999.
230 
231  #Define process index to indicate whether QCD or ttbar is floating in this bin
232  floating_process = 0
233  if "n_{lep} = 1" in ibin[3] : # comment out these two lines when testing only one process
234  floating_process = 1
235 
236 
237  #Make list of nominal hists for this bin (one entry per process) in order to calculate deviations later
238  for ip,proc in enumerate(procList):
239  fixed = (ip != floating_process)
240 
241  nominal = get_hist_with_overflow(sysFile,directory + "/" + proc)
242 
243  histsNom.append(nominal)
244  if fixed:
245  tot -= nominal.Integral()
246  else:
247  floating = nominal.Integral()
248 
249  #nominal floating normalization scale factor
250  sf = tot/floating
251  if tot_data == 0:
252  sf = 1.
253  #Apply normalization
254  histsNom[floating_process].Scale(sf)
255 
256  #Make master histogram of all nominal histograms (with floating normalization already corrected)
257  total_nominal = ROOT.TH1F(directory+"_nominal","",5,0,5)
258  total_nominal.Sumw2()
259  for ip,proc in enumerate(procList):
260  total_nominal.Add(histsNom[ip])
261 
262  if verbose:
263  print "Nominal SF, "+directory+", floating process "+str(floating_process)+", "+str(sf)
264  runningtotal=0
265  for ip,proc in enumerate(procList):
266  print "Process "+proc+": "+str(histsNom[ip].Integral())
267  runningtotal+=histsNom[ip].Integral()
268 
269  print "MC total: "+str(runningtotal)
270  print "Nominal hist total: " + str(total_nominal.Integral())
271  print "Data: "+str(tot_data)
272 
273 
274  #prepare cosmetics for plot, initialize table
275  ROOT.gStyle.SetPadRightMargin()
276  ROOT.gStyle.SetPadLeftMargin(0.12) #so it's not messed by larger table margin each iteration of the loop
277  c = ROOT.TCanvas()
278  leg = ROOT.TLegend(0.12,0.5,0.64,0.92)
279  table = ROOT.TH2F("table_"+directory,"",nbinsX,-0.5,nbinsX-0.5,nSyst,0,nSyst)
280  systHists_sym = []
281 
282  #loop over systs
283  for isys, syst in enumerate(systList,start=1):
284  sysName = syst[0]
285  if verbose:
286  print "starting "+sysName
287 
288  # this will eventually contain the errors to plot
289  systHist = ROOT.TH1F(directory+"_"+sysName+"_sym","",5,0,5)
290 
291  #stat uncertainty treated separately
292  if "mc_stat" not in sysName:
293  #pdf and mur,muf,murf treated separately
294  if "pdf" not in sysName and "mu" not in sysName:
295  #this function does everything
296  systHist = get_symmetrized_relative_errors(sysName,tot_data,total_nominal,procList,floating_process,sysFile,directory)
297  elif "mu" in sysName:
298  #In this case, there is a separate variation in sum_rescaled.root specific to each process
299  #We want to plot something to represent the total impact of varying mu for all the processes, so we just add them in quadrature
300  for pr in procList:
301  #calculate errors for each process (i.e. qcd_mur, wjets_mur ...)
302  thisproc = get_symmetrized_relative_errors(pr+"_"+sysName,tot_data,total_nominal,procList,floating_process,sysFile,directory)
303 
304  #square contents to add in quadrature
305  for i in range(1,thisproc.GetNbinsX()+1):
306  thisproc.SetBinContent(i,math.pow(thisproc.GetBinContent(i),2))
307 
308  systHist.Add(thisproc) #add
309 
310  #take square root again
311  for i in range(1,systHist.GetNbinsX()+1):
312  systHist.SetBinContent(i,math.pow(systHist.GetBinContent(i),0.5))
313 
314  elif "pdf" in sysName:
315  #This systematic is calculated from 100 variations that all represent a single source of uncertainty.
316  #We want to use information from all variations without artifically inflating the total uncertainty just by sampling the same effect many times.
317  #Therefore, we find symmetrized uncertainties for each pdf variation up/down, add them in quadrature and divide by sqrt(100) to normalize
318  for i in range(0,100):
319  #if i == 26 or i == 46: continue
320  #Get errors for this pdf variation
321  thisvar = get_symmetrized_relative_errors("w_pdf"+str(i),tot_data,total_nominal,procList,floating_process,sysFile,directory)
322  #Add in quadrature to running total
323  for i in range(1,systHist.GetNbinsX()+1):
324  thisvar.SetBinContent(i,math.pow(thisvar.GetBinContent(i),2))
325  systHist.Add(thisvar)
326 
327  #take square root and normalize by sqrt(100)
328  for i in range(1,systHist.GetNbinsX()+1):
329  systHist.SetBinContent(i,math.pow(systHist.GetBinContent(i),0.5)/10)
330 
331 
332  elif "mc_stat" in sysName:
333  systHist.Add(total_nominal)
334  for i in range(1,systHist.GetNbinsX()+1):
335  #in this case, we want our relative errors to just be the MC errors
336  if systHist.GetBinContent(i)>0:
337  systHist.SetBinContent(i,(systHist.GetBinError(i)/systHist.GetBinContent(i)))
338 
339 
340 
341  for i in range(1,systHist.GetNbinsX()+1):
342  if systHist.GetBinContent(i) < 0.001:
343  table.SetBinContent(i,isys,0.04)
344  else:
345  table.SetBinContent(i,isys,round(100*systHist.GetBinContent(i),1))
346  if verbose:
347  print "symmetrized rel error bin "+str(i)+" "+str(systHist.GetBinContent(i))
348 
349 
350  systHists_sym.append(systHist)
351 
352  table.GetYaxis().SetBinLabel(isys,syst[1])
353  systHists_sym[isys-1].SetTitle(";N_{b};Relative Error")
354  systHists_sym[isys-1].SetLineColor(syst[2])
355  systHists_sym[isys-1].SetLineStyle(syst[3])
356  systHists_sym[isys-1].SetLineWidth(2)
357  systHists_sym[isys-1].SetMaximum(1.)
358  systHists_sym[isys-1].SetMinimum(0.)
359  systHists_sym[isys-1].SetAxisRange(1,4,"X")
360  systHists_sym[isys-1].SetStats(0)
361  systHists_sym[isys-1].GetYaxis().SetTitleOffset(1.4)
362  systHists_sym[isys-1].GetYaxis().SetTitleSize(0.04)
363  systHists_sym[isys-1].GetXaxis().SetTitleSize(0.04)
364  systHists_sym[isys-1].GetXaxis().SetNdivisions(505)
365  leg.AddEntry(systHists_sym[isys-1],syst[1],"l")
366  systHists_sym[isys-1].Draw("hist same")
367 
368  leg.Draw()
369  tla = ROOT.TLatex()
370  tla.SetTextSize(0.038)
371  tla.DrawLatexNDC(0.12,0.93,"#font[62]{CMS} #scale[0.8]{#font[52]{Simulation}}")
372  tla.SetTextFont(42)
373  tla.DrawLatexNDC(0.71,0.93,"#sqrt{s} = 13 TeV")
374 # tla.SetTextSize(0.045)
375  tla.DrawLatexNDC(0.17, 0.45, ibin[3])
376  tla.DrawLatexNDC(0.17, 0.4, ibin[1])
377  tla.DrawLatexNDC(0.17, 0.35, ibin[2])
378  if one_pdf:
379  if directory == binList[0][0]:
380  outname = "plots/bkg_systs_all.pdf("
381  elif directory == binList[len(binList)-1][0]:
382  outname = "plots/bkg_systs_all.pdf)"
383  else:
384  outname = "plots/bkg_systs_all.pdf"
385 
386  else:
387  outname = "plots/bkg_systs_" + directory +".pdf"
388  print "outname is " +outname
389  c.Print(outname)
390 
391 
392  ROOT.gStyle.SetPadLeftMargin(0.35)
393  ROOT.gStyle.SetPadRightMargin(0.2)
394  ROOT.gStyle.SetPadBottomMargin(0.1)
395  c2 = ROOT.TCanvas()
396  table.GetXaxis().SetBinLabel(1,"0");
397  table.GetXaxis().SetBinLabel(2,"1");
398  table.GetXaxis().SetBinLabel(3,"2");
399  table.GetXaxis().SetBinLabel(4,"3");
400  table.GetXaxis().SetBinLabel(5,"#geq 4");
401  table.GetXaxis().SetNdivisions(400,0)
402  table.SetMaximum(20)
403  table.SetMinimum(0)
404  table.SetStats(0)
405  table.SetMarkerSize(1.5)
406  table.SetAxisRange(0.5,4.499,"X")
407  table.SetXTitle("N_{b}")
408  table.SetZTitle("Uncertainty [%]")
409  table.GetYaxis().SetTitleOffset(1.4)
410  table.GetYaxis().SetTitleSize(0.05)
411  table.GetYaxis().SetLabelSize(0.045)
412  table.GetXaxis().SetTitleSize(0.04)
413  table.Draw("colz text")
414  ROOT.gPad.SetTicks(1,0)
415  table.Draw("axis y+ same")
416  tla = ROOT.TLatex()
417  tla.SetTextSize(0.038)
418  tla.DrawLatexNDC(0.35,0.93,"#font[62]{CMS} #scale[0.8]{#font[52]{Simulation}}")
419  tla.SetTextFont(42)
420  tla.DrawLatexNDC(0.66,0.93,"#sqrt{s} = 13 TeV")
421  if one_pdf:
422  if directory == binList[0][0]:
423  outname = "plots/table_bkg_systs_all.pdf("
424  elif directory == binList[len(binList)-1][0]:
425  outname = "plots/table_bkg_systs_all.pdf)"
426  else:
427  outname = "plots/table_bkg_systs_all.pdf"
428 
429  else:
430  outname = "plots/table_bkg_systs_" + directory +".pdf"
431 
432 
433  c2.Print(outname)
434 
def set_palette_gray(ncontours=20)
def get_symmetrized_relative_errors(sysName, tot_data, total_nominal, procList, floating_process, sysFile, directory)
Definition: rpv_bkg_syst.py:39
def get_hist_with_overflow(file, histname)
Definition: rpv_bkg_syst.py:22