ra4_macros  bede988c286599a3a84b77a4d788ac0a971e89f9
table_prediction.cxx
Go to the documentation of this file.
1 // table_yields: Generates a LaTeX file with a cutflow table for RA4
2 
3 #include <fstream>
4 #include <iostream>
5 #include <cmath>
6 #include <string>
7 #include <sstream>
8 #include <stdlib.h>
9 #include "TChain.h"
10 #include "TFile.h"
11 #include "TString.h"
12 #include "TH1D.h"
13 #include "TObject.h"
14 #include "TObjString.h"
15 #include "TObjArray.h"
16 #include "TMath.h"
17 #include "RooStats/NumberCountingUtils.h"
18 
19 #include "styles.hpp"
20 #include "utilities.hpp"
21 #include "utilities_macros.hpp"
22 
23 namespace {
24  TString ntuple_date("2015_05_25");
25  TString luminosity=/*"0.135"*/"0.832";
26  bool do_1b=false;
27  bool do_2l=false;
28  bool do_ttbaronly = false;
29  bool do_note=true;
30  bool do_zbi=false;
31 
32  int method = 1; // Only methods 1 and 3 currently supported
33 }
34 
35 using namespace std;
36 using std::cout;
37 using std::endl;
38 
39 TString YieldsCut(TString title, TString cuts, vector<TChain*> chain, vector<sfeats> Samples, int nsig);
40 float GetNdata(TString oneline, bool doMC);
41 
42 int main(){
43 
44  // Reading ntuples
45  TString folder, folder_ns, folder_1ldata;
46  folder = "/net/cms2/cms2r0/babymaker/babies/2015_10_19/mc/skim_1lht400mc/";
47  folder_1ldata = "/net/cms2/cms2r0/babymaker/babies/2015_10_19/data/singlelep/combined/skim_1lht400/";
48  vector<TString> s_trig_sl;
49  s_trig_sl.push_back(folder_1ldata+"/*Single*");
50 
51  vector<TString> s_tt;
52  s_tt.push_back(folder+"*_TTJets*Lept*");
53  s_tt.push_back(folder+"*_TTJets_HT*");
54  vector<TString> s_wjets;
55  s_wjets.push_back(folder+"*_WJetsToLNu*");
56  vector<TString> s_ttv;
57  s_ttv.push_back(folder+"*_TTWJets*");
58  s_ttv.push_back(folder+"*_TTZTo*");
59  vector<TString> s_single;
60  s_single.push_back(folder+"*_ST_*");
61  vector<TString> s_other;
62  s_other.push_back(folder+"*DYJetsToLL*");
63  s_other.push_back(folder+"*_QCD_HT*");
64  s_other.push_back(folder+"*_ZJet*");
65  s_other.push_back(folder+"*_WWTo*");
66  s_other.push_back(folder+"*ggZH_HToBB*");
67  s_other.push_back(folder+"*ttHJetTobb*");
68  vector<TString> s_t1t;
69  s_t1t.push_back(folder+"*T1tttt*1500_*");
70  vector<TString> s_t1tc;
71  s_t1tc.push_back(folder+"*T1tttt*1200_*");
72 
73  vector<TChain *> chain;
74  vector<sfeats> Samples;
75  Samples.push_back(sfeats(s_trig_sl, "Data", 1003,1,"(trig[4]||trig[8])&&pass&&(mt<140||(mt>140&&mj<400))"));
76  if(!do_ttbaronly){
77  Samples.push_back(sfeats(s_other, "Other", 1001));
78  Samples.push_back(sfeats(s_ttv, "$t\\bar{t}V$", 1002));
79  Samples.push_back(sfeats(s_single, "Single $t$", 1005));
80  Samples.push_back(sfeats(s_wjets, "W+jets", 1004));
81  }
82  Samples.push_back(sfeats(s_tt, "$t\\bar{t}$ (1$\\ell$)", 1000,1,
83  "ntruleps<=1"));
84  Samples.push_back(sfeats(s_tt, "$t\\bar{t}$ ($2\\ell$)", 1006,1,
85  "ntruleps>=2"));
86 
87  int nsig(0);
88 
89  for(unsigned sam(0); sam < Samples.size(); sam++){
90  chain.push_back(new TChain("tree"));
91  for(unsigned insam(0); insam < Samples[sam].file.size(); insam++)
92  chain[sam]->Add(Samples[sam].file[insam]);
93  }
94 
95  // 2l
96  vector<TChain *> chain_2l;
97  vector<sfeats> Samples_2l;
98  Samples_2l.push_back(sfeats(s_trig_sl, "Data", 1003,1,"(trig[4]||trig[8])&&pass"));
99  if(!do_ttbaronly){
100  Samples_2l.push_back(sfeats(s_other, "Other", 1001));
101  Samples_2l.push_back(sfeats(s_ttv, "$t\\bar{t}V$", 1002));
102  Samples_2l.push_back(sfeats(s_single, "Single $t$", 1005));
103  Samples_2l.push_back(sfeats(s_wjets, "W+jets", 1004));
104  }
105  Samples_2l.push_back(sfeats(s_tt, "$t\\bar{t}$ (1$\\ell$)", 1000,1,
106  "ntruleps<=1"));
107  Samples_2l.push_back(sfeats(s_tt, "$t\\bar{t}$ ($2\\ell$)", 1006,1,
108  "ntruleps>=2"));
109 
110  for(unsigned sam(0); sam < Samples_2l.size(); sam++){
111  chain_2l.push_back(new TChain("tree"));
112  for(unsigned insam(0); insam < Samples_2l[sam].file.size(); insam++)
113  chain_2l[sam]->Add(Samples_2l[sam].file[insam]);
114  }
115 
116  TString minjets("7"), midjets("8"), mjthresh("600"), highmet("400");
117  if(method==1) { minjets = "6"; midjets="99"; mjthresh="400"; highmet="9999";}
118  if(method==3) { minjets ="6"; midjets="8"; mjthresh="400"; highmet="400";}
119 
120 
121  TString minjets_2l(""), midjets_2l("");
122  minjets_2l += (minjets.Atoi()-1); midjets_2l += (midjets.Atoi()-1);
123  TString fom("$Z_{\\rm bi}$");
124  if(!do_zbi) fom = "S/B";
125  TString outname = "txt/yields_mj"+mjthresh+"_met"+highmet+"_njets"+minjets+midjets+"_lumi"+luminosity+"_"+ntuple_date+".tex";
126  if(do_1b) outname.ReplaceAll("yields","yields_1b");
127  if(do_2l) outname.ReplaceAll("yields","yields_2l");
128  if(do_zbi) outname.ReplaceAll("yields","yields_zbi");
129 
130  TString baseline("ht>450&&met>200");
131  TString cuts_1l(baseline+"&&(nmus+nels)==1&&nbm>=1&&njets>="+minjets+"");
132  TString cuts_1l1b(baseline+"&&(nmus+nels)==1&&nbm==1&&njets>="+minjets+"");
133  TString cuts_2l("ht>450&&met>200&&(nmus+nels)==2&&njets>="+minjets_2l+"");
134  TString cuts_2lbb("ht>500&&met>200&&(nmus+nels)==2&&njets>="+minjets_2l+"");
135  TString cuts_1ltex("$H_T>450, \\mathrm{MET}>200, n_{\\rm jets}\\geq "+minjets+", n_b\\geq1, n_{\\rm lep}=1$");
136  TString cuts_1l1btex("$H_T>450, \\mathrm{MET}>200, n_{\\rm jets}\\geq "+minjets+", n_b=1, n_{\\rm lep}=1$");
137  TString cuts_2ltex("$H_T>450, \\mathrm{MET}>200, n_{\\rm jets}\\geq "+minjets_2l+", n_{\\rm lep}=2$");
138  TString cuts_2lbbtex("$H_T>450, \\mathrm{MET}>200, n_{\\rm jets}\\geq "+minjets_2l+", n_{\\rm lep}=2$");
139  if(do_1b) {
140  cuts_1l.ReplaceAll("nbm>=2","nbm==1");
141  cuts_1ltex.ReplaceAll("n_b\\geq 2", "n_b=1");
142  }
143  ifstream header("txt/header.tex");
144  ifstream footer("txt/footer.tex");
145  ofstream file(outname);
146  file<<header.rdbuf();
147  file<<"\\vspace{80 mm}\n";
148  file << "\n\\begin{tabular}[tbp!]{ l | ";
149  for(unsigned sam(0); sam < Samples.size()-nsig; sam++) file << "r";
150  file<<" | r ";
151  for(int sam(0); sam < nsig; sam++) file<<"| r ";
152  file<<"}\\hline\\hline\n";
153  file << " \\multicolumn{1}{c|}{${\\cal L} = "<<luminosity<<"$ fb$^{-1}$} ";
154  for(unsigned sam(0); sam < Samples.size()-nsig; sam++)
155  file << " & "<<Samples[sam].label;
156  file<< " & MC bkg. ";
157  for(unsigned sam(Samples.size()-nsig); sam < Samples.size(); sam++)
158  file << " & "<<Samples[sam].label;//<< " & "+fom;
159  file << "\\\\ \\hline \n ";
160 
161  TString mtcut, mjcut;
162  TString indent("\\hspace{5 mm} ");
163  if(do_note) indent = "";
164  vector<TString> binnames;
165  for(unsigned bin(1); bin <= 6; bin++)
166  if(do_note) binnames.push_back("");
167  else {
168  TString name("Bin "); name += bin; name += ": ";
169  binnames.push_back(name);
170  }
171  TString higjets(""); higjets += (midjets.Atoi()+1);
172  TString lownj("$n_j\\leq "+midjets+"$");
173  TString hignj("$n_j\\geq "+higjets+"$");
174  TString lownj_2l("$n_j\\leq "+midjets_2l+"$");
175  TString higjets_2l(""); higjets_2l += (midjets_2l.Atoi()+1);
176  TString hignj_2l("$n_j\\geq "+higjets_2l+"$");
177  TString lowmet("$\\text{MET}\\leq "+highmet+"$");
178  TString higmet("$\\text{MET}> "+highmet+"$");
179  TString letter("R");
180  if(do_1b) letter = "B";
181  if(do_2l) letter = "D";
182  TString regions[4];
183  float Ndata[4];
184  float Nmc[4];
185  TString onelineyields[4];
186  for(int ind(0); ind<4; ind++) {regions[ind] = letter; regions[ind] += (ind+1);}
187 
189  file << " \\multicolumn{"<< Samples.size()+1<<"}{c}{"<< cuts_1ltex <<"} \\\\ \\hline \\hline\n";
190 
191  mjcut="mj<="+mjthresh; mtcut="mt<=140";
192  onelineyields[0] = YieldsCut(regions[0]+": $m_T \\leq 140,M_J\\leq "+mjthresh+"$", mjcut+"&&"+mtcut+"&&"+cuts_1l, chain, Samples, nsig);
193  file << onelineyields[0];
194  file <<"\\hline\n";
195 
196  mjcut="mj>"+mjthresh; mtcut="mt<=140";
197  onelineyields[1] = YieldsCut(regions[1]+": $m_T \\leq 140,M_J> "+mjthresh+"$", mjcut+"&&"+mtcut+"&&"+cuts_1l, chain, Samples, nsig);
198  file << onelineyields[1];
199  file <<"\\hline\n";
200 
201  mjcut="mj<="+mjthresh; mtcut="mt>140";
202  onelineyields[2] = YieldsCut(regions[2]+": $m_T > 140,M_J \\leq "+mjthresh+"$", mjcut+"&&"+mtcut+"&&"+cuts_1l, chain, Samples, nsig);
203  file << onelineyields[2];
204  file <<"\\hline\n";
205 
206  mjcut="mj>"+mjthresh; mtcut="mt>140";
207  onelineyields[3] = YieldsCut(regions[3]+": $m_T > 140,M_J > "+mjthresh+"$", mjcut+"&&"+mtcut+"&&"+cuts_1l, chain, Samples, nsig);
208  file << onelineyields[3];
209  file <<"\\hline\n";
210 
211  // Do backgrond prediction
212  // data = N2*N3/N1
213  // mc = (N4/N3/(N2/N1) : ignore for this bc negligible
214  Ndata[0] = GetNdata(onelineyields[0], 0);
215  Ndata[1] = GetNdata(onelineyields[1], 0);
216  Ndata[2] = GetNdata(onelineyields[2], 0);
217  Ndata[3] = GetNdata(onelineyields[3], 0);
218  Nmc[0] = GetNdata(onelineyields[0], 1);
219  Nmc[1] = GetNdata(onelineyields[1], 1);
220  Nmc[2] = GetNdata(onelineyields[2], 1);
221  Nmc[3] = GetNdata(onelineyields[3], 1);
222 
223  cout << Ndata[0] << " " << Ndata[1] << " " << Ndata[2] << " " << Ndata[3] << endl;
224  cout << Nmc[0] << " " << Nmc[1] << " " << Nmc[2] << " " << Nmc[3] << endl;
225 
226  // mc
227  float kappa = (Nmc[3]/Nmc[2])/(Nmc[1]/Nmc[0]);
228  // data
229  vector<vector<float> > entries, weights;
230  for(int i=0; i<4; i++){
231  vector<float> tmp;
232  if(i<3) tmp.push_back(Ndata[i]);
233  else tmp.push_back(1);
234  vector<float> tmpweights;
235  tmpweights.push_back(1);
236 
237  entries.push_back(tmp);
238  weights.push_back(tmpweights);
239  }
240  vector<float> powersk; powersk.push_back(-1); powersk.push_back(1); powersk.push_back(1); //powersk.push_back(-1);
241  float mSigma, pSigma;
242  float kappaData = calcKappa(entries, weights, powersk, mSigma, pSigma, true, true);
243 
244  cout << "MC : " << kappa << endl;
245  cout << "Data : " << kappaData << " " << mSigma << " " << pSigma << endl;
246 
247  cout << "Precition : " << kappa*kappaData << " +" << kappa*pSigma << " -" << kappa*mSigma << endl;
248 
249  file << "Data driven prediction & \\multicolumn{"<<Samples.size()-1 << "}{c}{" << Form("$%.1f^{+%.1f}_{-%.1f}$", kappa*kappaData, kappa*pSigma, kappa*mSigma) << "} \\\\ \n";
250 
252  file << "\\hline \\hline\\multicolumn{"<< Samples.size()+1<<"}{c}{"
253  <<cuts_2ltex <<"} \\\\ \\hline \\hline\n";
254 
255  mjcut="mj<="+mjthresh;
256  onelineyields[2] = YieldsCut("D3: $M_J \\leq "+mjthresh+"$", "mj <= "+mjthresh+"&&"+cuts_2l, chain_2l, Samples_2l, nsig);
257  file << onelineyields[2];
258  file <<"\\hline\n";
259 
260 
261  mjcut="mj>"+mjthresh;
262  onelineyields[3] = YieldsCut("D4: $M_J > "+mjthresh+"$", "mj>"+mjthresh+"&&"+cuts_2l, chain_2l, Samples_2l, nsig);
263  file << onelineyields[3];
264  file <<"\\hline\n";
265 
266  // closure
267  Ndata[2] = GetNdata(onelineyields[2], 0);
268  Ndata[3] = GetNdata(onelineyields[3], 0);
269  Nmc[2] = GetNdata(onelineyields[2], 1);
270  Nmc[3] = GetNdata(onelineyields[3], 1);
271 
272  cout << Ndata[0] << " " << Ndata[1] << " " << Ndata[2] << " " << Ndata[3] << endl;
273  cout << Nmc[0] << " " << Nmc[1] << " " << Nmc[2] << " " << Nmc[3] << endl;
274 
275  // mc
276  kappa = (Nmc[3]/Nmc[2])/(Nmc[1]/Nmc[0]);
277  // data
278  entries.pop_back(); entries.pop_back();
279  weights.pop_back(); weights.pop_back();
280  for(int i=2; i<4; i++){
281  vector<float> tmp;
282  if(i<3) tmp.push_back(Ndata[i]);
283  else tmp.push_back(1);
284  vector<float> tmpweights;
285  tmpweights.push_back(1);
286 
287  entries.push_back(tmp);
288  weights.push_back(tmpweights);
289  }
290  kappaData = calcKappa(entries, weights, powersk, mSigma, pSigma, true, true);
291 
292  cout << "MC : " << kappa << endl;
293  cout << "Data : " << kappaData << " " << mSigma << " " << pSigma << endl;
294 
295  file << "Data driven prediction & \\multicolumn{"<<Samples.size()-1 << "}{c}{" << Form("$%.1f^{+%.1f}_{-%.1f}$", kappa*kappaData, kappa*pSigma, kappa*mSigma) << "} \\\\ \n";
296 
297  file << "\\hline\\multicolumn{1}{c|}{} ";
298  for(unsigned sam(0); sam < Samples.size()-nsig; sam++)
299  file << " & "<<Samples[sam].label;
300  file<< " & MC bkg. ";
301  for(unsigned sam(Samples.size()-nsig); sam < Samples.size(); sam++)
302  file << " & "<<Samples[sam].label;//<< " & "+fom;
303  file << "\\\\ \n ";
304 
305  file<< "\\hline\\hline\n\\end{tabular}"<<endl<<endl;
306  file<<footer.rdbuf();
307  file.close();
308  cout<<endl<<"Written "<<outname<<endl;
309 }
310 
311 TString YieldsCut(TString title, TString cuts, vector<TChain*> chain, vector<sfeats> Samples, int nsig){
312  TString totCut, Hname = "histo", out;
313  vector<double> yield, error;
314  double bkg(0), bkg_err(0), err, yield_sam;
315  int nsam(chain.size()), entries(0);
316  for(int sam(0); sam < nsam; sam++){
317  if(Samples[sam].label.Contains("Data")) totCut = "("+cuts+"&&"+Samples[sam].cut+")";
318  else totCut = luminosity+"*weight*("+cuts+"&&"+Samples[sam].cut+")";
319  entries = getYieldErr(*chain[sam], totCut, yield_sam, err);
320  yield.push_back(yield_sam);
321  error.push_back(err);
322  if(sam<nsam-nsig) {
323  if(!Samples[sam].label.Contains("Data"))
324  {
325  if(yield[sam]>0) bkg += yield[sam];
326  bkg_err = sqrt(pow(bkg_err,2)+pow(err,2));
327  }
328  }
329  cout<<sam<<": yield "<<Samples[sam].label<<" "<<yield[sam]<<", n "<<entries<<" \t "<<totCut<<endl;
330  }
331 
332  cout<<title<<": B = "<<(RoundNumber(bkg,1)+" +- "+RoundNumber(bkg_err,1));
333  out = title;
334  for(int sam(0); sam < nsam-nsig; sam++) out += (" \t & " + RoundNumber(yield[sam],1));
335  out += (" \t & $"+RoundNumber(bkg,1))+" \\pm "+RoundNumber(bkg_err,1)+"$";
336  for(int sam(nsam-nsig); sam < nsam; sam++) {
337  float fracerr(sqrt(pow(bkg_err/bkg,2)+0.3*0.3+0.24*0.24));
338  out += (" \t& $" + RoundNumber(yield[sam],1)+" \\pm "+RoundNumber(error[sam],1))+"$";// + "$ \t& ");
339  // if(do_zbi) out += RoundNumber(RooStats::NumberCountingUtils::BinomialExpZ(yield[sam], bkg, fracerr),2);
340  // else out += RoundNumber(yield[sam],2,bkg);
341 
342  cout<<", S = "<<RoundNumber(yield[sam],1)+" +- "+RoundNumber(error[sam],1)<<" with Zbi = ";
343  cout<<RoundNumber(RooStats::NumberCountingUtils::BinomialExpZ(yield[sam], bkg, fracerr),2);
344  }
345  out += " \\\\ \n";
346  cout<<endl;
347  return out;
348 }
349 
350 float GetNdata(TString oneline, bool doMC){
351  int index = 1;
352  if(doMC) index = 8;
353  TObjArray* tokens = oneline.Tokenize("&");
354  TString Ndata_str(dynamic_cast<TObjString *>(tokens->At(index))->GetString());
355  delete tokens;
356 
357  if(!doMC) return atof(Ndata_str);
358  else {
359  TObjArray* tokensMC = Ndata_str.Tokenize(" ");
360  TString Nmc_str(dynamic_cast<TObjString *>(tokensMC->At(0))->GetString());
361  delete tokensMC;
362  Nmc_str.ReplaceAll("$","");
363  return atof(Nmc_str);
364  }
365 }
366 
367 
long getYieldErr(TChain &tree, TString cut, double &yield, double &uncertainty)
STL namespace.
TString YieldsCut(TString title, TString cuts, vector< TChain * > chain, vector< sfeats > Samples, int nsig)
tuple kappa
Definition: parse_card.py:264
TString RoundNumber(double num, int decimals, double denom=1.)
Definition: utilities.cpp:191
tuple file
Definition: parse_card.py:238
int main()
float GetNdata(TString oneline, bool doMC)
double calcKappa(std::vector< std::vector< float > > &entries, std::vector< std::vector< float > > &weights, std::vector< float > &powers, float &mSigma, float &pSigma, bool do_data=false, bool verbose=false, bool do_plot=false, int nrep=100000)