babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
change_weights.cxx
Go to the documentation of this file.
1 // change_weight: Produces new ntuples with the weights and systematics normalized to average 1
2 
3 #include <iostream>
4 #include <ctime>
5 #include <string>
6 #include <vector>
7 #include <iomanip> // setw
8 
9 #include "TChain.h"
10 #include "TError.h"
11 #include "TSystem.h"
12 #include "TDirectory.h"
13 
14 #include "baby_full.hh"
15 #include "utilities.hh"
16 #include "cross_sections.hh"
17 
18 using namespace std;
19 
20 namespace{
21  const int NSYSTS = 19;
22 }
23 
24 int main(int argc, char *argv[]){
25  gErrorIgnoreLevel=6000; // Turns off ROOT errors due to missing branches
26  time_t begtime, endtime;
27  time(&begtime);
28 
29  if(argc<2){
30  cout<<"Format: ./run/change_weights.exe <infolder>=. <sample>=\"*.root\" <outfolder>=infolder"<<endl<<endl<<endl;
31  return 1;
32  }
33 
34  // Take command line arguments
35  TString folder("."), sample("*.root");
36  if(argc>=2) folder=argv[1];
37  if(argc>=3) sample=argv[2];
38  TString outfolder(folder);
39  if(argc>=4) outfolder=argv[3];
40  if(!folder.EndsWith("/")) folder.Append("/");
41  if(!outfolder.EndsWith("/")) outfolder.Append("/");
42  gSystem->mkdir(outfolder, kTRUE);
43 
44  float xsec(0.);
45  if (!sample.Contains("SMS")){
46  TChain chglob("treeglobal");
47  chglob.Add((folder+sample).Data());
48  size_t foundent = chglob.GetEntries();
49  if (foundent==0) {
50  cout<<"[Change Weights] ERROR: No entries found! Exit."<<endl;
51  exit(1);
52  } else {
53  cout<<"[Change Weights] Found "<<foundent<<" entries."<<endl;
54  }
55  chglob.SetBranchAddress("xsec", &xsec);
56  chglob.GetEntry(0);
57  cout<<"Found xsec = "<<xsec<<endl;
58  }
59 
60  //Setup chains
61  baby_full ch((folder+sample).Data());
62  ch.GetEntry(0); //Set branches to get size
63 
64  vector<TString> var_type, var;
65  vector<vector<TString> > var_val(NSYSTS);
66  double nent_eff=0, nent=0, sum_weff_l0=0, sum_weff_l1=0, sum_btag=0, sum_pu=0, sum_toppt=0, sum_wisr=0;
67  // vector<double> sum_wpdf(ch.w_pdf().size(),0);
68  vector<double> sum_bctag(ch.sys_bctag().size()), sum_udsgtag(ch.sys_udsgtag().size());
69  vector<double> sum_fs_bctag(ch.sys_fs_bctag().size()), sum_fs_udsgtag(ch.sys_fs_udsgtag().size());
70  vector<double> sum_isr(ch.sys_isr().size());
71  // vector<double> sum_spdf(2,0.);
72  vector<double> sum_mur(ch.sys_mur().size()), sum_muf(ch.sys_muf().size()), sum_murf(ch.sys_murf().size());
73  double nent_zlep=0, sum_wlep=0, sum_fs_wlep=0;
74  vector<double> sum_slep(ch.sys_lep().size()), sum_fs_slep(ch.sys_fs_lep().size());
75  // double sum_pdf_min(0.);
76 
77  int nentries = ch.GetEntries();
78  //nentries = 10;
79  //Loop over events and get sum of weights
80  time(&begtime);
81  for(int ientry=0; ientry<nentries; ientry++){
82  ch.GetEntry(ientry);
83 
84  if(ientry%500000==0 || ientry==nentries-1) {
85  time(&endtime);
86  int seconds(difftime(endtime,begtime));
87  cout<<"Doing entry "<<setw(10)<<addCommas(ientry+1)<<" of "<<addCommas(nentries)
88  <<" Took "<<setw(6)<<seconds<<" seconds at "
89  <<setw(4)<<roundNumber(ientry,1,seconds*1000.)<<" kHz"<<endl;
90  }
91  float lsign = ch.w_lumi()>0 ? 1:-1;
92  nent_eff += lsign;
93  nent++;
94 
95  // nent_eff ++;
96  sum_btag += noNaN(ch.w_btag());
97  sum_pu += noNaN(ch.w_pu());
98  sum_toppt += noNaN(ch.w_toppt());
99  // for(unsigned int isys=0;isys<ch.w_pdf().size();isys++) sum_wpdf[isys] += noNaN(ch.w_pdf().at(isys));
100  for(unsigned int isys=0;isys<ch.sys_bctag().size();isys++) sum_bctag[isys] += noNaN(ch.sys_bctag().at(isys));
101  for(unsigned int isys=0;isys<ch.sys_udsgtag().size();isys++) sum_udsgtag[isys] += noNaN(ch.sys_udsgtag().at(isys));
102  for(unsigned int isys=0;isys<ch.sys_fs_bctag().size();isys++) sum_fs_bctag[isys] += noNaN(ch.sys_fs_bctag().at(isys));
103  for(unsigned int isys=0;isys<ch.sys_fs_udsgtag().size();isys++) sum_fs_udsgtag[isys] += noNaN(ch.sys_fs_udsgtag().at(isys));
104  for(unsigned int isys=0;isys<ch.sys_mur().size();isys++) sum_mur[isys] += noNaN(ch.sys_mur().at(isys));
105  for(unsigned int isys=0;isys<ch.sys_muf().size();isys++) sum_muf[isys] += noNaN(ch.sys_muf().at(isys));
106  for(unsigned int isys=0;isys<ch.sys_murf().size();isys++) sum_murf[isys] += noNaN(ch.sys_murf().at(isys));
107 
108  float w_isr=1.;
109  if (sample.Contains("SMS") && !sample.Contains("TChi")){
110  vector<float> sys_isr;
111  w_isr = wISR(ch.nisr(), sys_isr);
112  for(unsigned int isys=0;isys<ch.sys_isr().size();isys++) sum_isr[isys] += sys_isr[isys];
113  // cout<<"nisr "<<ch.nisr()<<", w_isr "<< wISR(ch.nisr(), sys_isr)<<", sys_isr[0] "<<sys_isr[0]<<endl;
114  } else {// if strong SUSY, need to fix w_isr
115  w_isr = noNaN(ch.w_isr());
116  for(unsigned int isys=0;isys<ch.sys_isr().size();isys++) sum_isr[isys] += noNaN(ch.sys_isr().at(isys));
117  }
118  sum_wisr += w_isr;
119 
120  double weight = noNaN(ch.w_lep()) * noNaN(ch.w_fs_lep()) * noNaN(ch.w_btag()) * w_isr * noNaN(ch.w_pu());
121  //Lepton weights
122  if(ch.nleps()==0) {
123  nent_zlep += 1;
124  sum_weff_l0 += weight;
125  } else{
126  sum_weff_l1 += weight;
127  sum_wlep += ch.w_lep();
128  sum_fs_wlep += ch.w_fs_lep();
129  for(unsigned int isys=0;isys<ch.sys_lep().size();isys++) sum_slep[isys] += ch.sys_lep().at(isys);
130  for(unsigned int isys=0;isys<ch.sys_fs_lep().size();isys++) sum_fs_slep[isys] += ch.sys_fs_lep().at(isys);
131  }
132  }
133  // // Hack to recompute sys_pdf[1] which had a 1e-3 cut
134  // sum_spdf[1] = sum_pdf_min;
135 
136  const float luminosity = 1000.;
137  if (sample.Contains("SMS")){
138  float exsec(0.);
139  if(sample.Contains("T1") || sample.Contains("T5")) xsec::signalCrossSection(ch.mgluino(), xsec, exsec);
140  else if(sample.Contains("TChiHH")) xsec::higgsinoCrossSection(ch.mgluino(), xsec, exsec);
141  else xsec::stopCrossSection(ch.mgluino(), xsec, exsec);
142  }
143  float w_lumi = xsec*luminosity / nent_eff;
144  //float w_lumi_corr = w_lumi / fabs(ch.w_lumi());
145 
146  // Average w_toppt in bins of ht_isr_me found in inclusive TTJets (SingleLept and DiLept)
147  double wanted_toppt(1.);
148  if(sample.Contains("TTJets_HT-600to800")) wanted_toppt = 0.8577;
149  if(sample.Contains("TTJets_HT-800to1200")) wanted_toppt = 0.8352;
150  if(sample.Contains("TTJets_HT-1200to2500")) wanted_toppt = 0.8201;
151  if(sample.Contains("TTJets_HT-2500toInf")) wanted_toppt = 0.8198;
152 
153  // Average values found with find_w_isr.cxx
154  double wanted_w_isr(1.);
155  vector<double> wanted_sys_isr(2,1.);
156  if(sample.Contains("TTJets_HT-600to800")) {
157  wanted_w_isr = 0.7838;
158  wanted_sys_isr[0] = 0.8965;
159  wanted_sys_isr[1] = 0.6604;
160  }
161  if(sample.Contains("TTJets_HT-800to1200")) {
162  wanted_w_isr = 0.7600;
163  wanted_sys_isr[0] = 0.8851;
164  wanted_sys_isr[1] = 0.6230;
165  }
166  if(sample.Contains("TTJets_HT-1200to2500")) {
167  wanted_w_isr = 0.7365;
168  wanted_sys_isr[0] = 0.8739;
169  wanted_sys_isr[1] = 0.5861;
170  }
171  if(sample.Contains("TTJets_HT-2500toInf")) {
172  wanted_w_isr = 0.7254;
173  wanted_sys_isr[0] = 0.8686;
174  wanted_sys_isr[1] = 0.5687;
175  }
176 
177  time(&endtime);
178  int seconds = difftime(endtime, begtime);
179  float hertz = nentries; hertz /= seconds;
180  cout<<"[Change Weights] Completed in "<<seconds<<" seconds ("<<hoursMinSec(seconds)<<") for "<<nentries
181  <<" events -> "<<roundNumber(hertz,1,1000)<<" kHz, "<<roundNumber(1000,2,hertz)<<" ms per event"<<endl<<endl;
182 
183  //Set type and var name
184  var_type.push_back("float"); var.push_back("weight");
185  var_type.push_back("float"); var.push_back("w_btag");
186  var_type.push_back("float"); var.push_back("w_pu");
187  var_type.push_back("float"); var.push_back("w_toppt");
188  var_type.push_back("float"); var.push_back("w_isr");
189  // var_type.push_back("vfloat"); var.push_back("w_pdf");
190  var_type.push_back("vfloat"); var.push_back("sys_bctag");
191  var_type.push_back("vfloat"); var.push_back("sys_udsgtag");
192  var_type.push_back("vfloat"); var.push_back("sys_fs_bctag");
193  var_type.push_back("vfloat"); var.push_back("sys_fs_udsgtag");
194  var_type.push_back("vfloat"); var.push_back("sys_isr");
195  var_type.push_back("vfloat"); var.push_back("sys_pdf");
196  var_type.push_back("vfloat"); var.push_back("sys_mur");
197  var_type.push_back("vfloat"); var.push_back("sys_muf");
198  var_type.push_back("vfloat"); var.push_back("sys_murf");
199  var_type.push_back("float"); var.push_back("w_lep");
200  var_type.push_back("float"); var.push_back("w_fs_lep");
201  var_type.push_back("vfloat"); var.push_back("sys_lep");
202  var_type.push_back("vfloat"); var.push_back("sys_fs_lep");
203  var_type.push_back("float"); var.push_back("w_lumi");
204 
205  // cout<<"sum_weff "<<sum_weff<<", wcorr "<<nent_eff*w_lumi_corr/sum_weff<<", nent "<<nent_eff<<
206  // ", w_lumi_corr "<<w_lumi_corr<<endl;
207  //Calculate weights
208  float w_corr_l0 = (nent-sum_wlep)/nent_zlep * (nent-sum_fs_wlep)/nent_zlep;
209  if(nent_zlep==0) w_corr_l0 = 1.;
210  var_val[0].push_back("*"+to_string(nent*wanted_w_isr/(sum_weff_l0*w_corr_l0 + sum_weff_l1)));
211  var_val[1].push_back("*"+to_string(nent/sum_btag));
212  var_val[2].push_back("*"+to_string(nent/sum_pu));
213  var_val[3].push_back("*"+to_string(nent*wanted_toppt/sum_toppt));
214  var_val[4].push_back("*"+to_string(nent*wanted_w_isr/sum_wisr));
215  // for(unsigned int idx=0;idx<sum_wpdf.size();idx++) var_val[4].push_back("*"+to_string(nent/sum_wpdf[idx]));
216  for(unsigned int idx=0;idx<sum_bctag.size();idx++) var_val[5].push_back("*"+to_string(nent/sum_bctag[idx]));
217  for(unsigned int idx=0;idx<sum_udsgtag.size();idx++) var_val[6].push_back("*"+to_string(nent/sum_udsgtag[idx]));
218  for(unsigned int idx=0;idx<sum_fs_bctag.size();idx++) var_val[7].push_back("*"+to_string(nent/sum_fs_bctag[idx]));
219  for(unsigned int idx=0;idx<sum_fs_udsgtag.size();idx++) var_val[8].push_back("*"+to_string(nent/sum_fs_udsgtag[idx]));
220  for(unsigned int idx=0;idx<sum_isr.size();idx++) var_val[9].push_back("*"+to_string(nent*wanted_sys_isr[idx]/sum_isr[idx]));
221  // for(unsigned int idx=0;idx<sum_spdf.size();idx++) var_val[10].push_back("*"+to_string(nent/sum_spdf[idx]));
222  for(unsigned int idx=0;idx<sum_mur.size();idx++) var_val[11].push_back("*"+to_string(nent/sum_mur[idx]));
223  for(unsigned int idx=0;idx<sum_muf.size();idx++) var_val[12].push_back("*"+to_string(nent/sum_muf[idx]));
224  for(unsigned int idx=0;idx<sum_murf.size();idx++) var_val[13].push_back("*"+to_string(nent/sum_murf[idx]));
225  //Calculate lepton weights
226  var_val[14].push_back("*"+to_string((nent-sum_wlep)/nent_zlep));
227  var_val[15].push_back("*"+to_string((nent-sum_fs_wlep)/nent_zlep));
228  for(unsigned int idx=0;idx<sum_slep.size();idx++)
229  var_val[16].push_back("*"+to_string((nent-sum_slep[idx])/nent_zlep));
230  for(unsigned int idx=0;idx<sum_fs_slep.size();idx++)
231  var_val[17].push_back("*"+to_string((nent-sum_fs_slep[idx])/nent_zlep));
232  var_val[18].push_back(roundNumber(w_lumi,12));
233 
234 
235  int totentries(0);
236  vector<TString> files = dirlist(folder,sample);
237  for(unsigned int i=0; i<files.size(); i++){
238  cout<<"[Change Weights] File "<<i+1<<"/"<<files.size()<<": "<<files[i]<<endl;
239  totentries += change_branch_one(folder, files[i], outfolder, var_type, var, var_val, nentries);
240  }
241 
242  time(&endtime);
243  seconds = difftime(endtime, begtime);
244  hertz = totentries; hertz /= seconds;
245  cout<<endl<<"Took "<<seconds<<" seconds ("<<hoursMinSec(seconds)<<") for "<<totentries
246  <<" events -> "<<roundNumber(hertz,1,1000)<<" kHz, "<<roundNumber(1000,2,hertz)<<" ms per event"<<endl<<endl;
247 }
long GetEntries() const
Definition: baby_base.cc:3310
TString hoursMinSec(long seconds)
Definition: utilities.cc:599
int const & mgluino() const
Definition: baby_base.cc:5605
float const & w_lumi() const
Definition: baby_base.cc:5528
float const & w_fs_lep() const
Definition: baby_base.cc:5495
std::vector< float > const & sys_muf() const
Definition: baby_base.cc:7222
int nent
Definition: get_flist.py:69
std::vector< float > const & sys_fs_bctag() const
Definition: baby_base.cc:7068
std::vector< float > const & sys_fs_lep() const
Definition: baby_base.cc:7090
void stopCrossSection(int stop_mass, float &xsec, float &xsec_unc)
STL namespace.
std::vector< float > const & sys_lep() const
Definition: baby_base.cc:7156
float const & w_isr() const
Definition: baby_base.cc:5506
void higgsinoCrossSection(int hig_mass, float &xsec, float &xsec_unc)
float const & nisr() const
Definition: baby_base.cc:5286
void signalCrossSection(int glu_mass, float &xsec, float &xsec_unc)
TString addCommas(double num)
Definition: utilities.cc:499
TString roundNumber(double num, int decimals, double denom=1.)
Definition: utilities.cc:478
T noNaN(T val, T defval=1.)
Definition: utilities.hh:61
std::vector< float > const & sys_isr() const
Definition: baby_base.cc:7145
std::vector< float > const & sys_mur() const
Definition: baby_base.cc:7233
int change_branch_one(TString indir, TString name, TString outdir, std::vector< TString > var_type, std::vector< TString > var, std::vector< std::vector< TString > > var_val, int totentries)
Definition: utilities.cc:32
int main(int argc, char *argv[])
std::vector< float > const & sys_udsgtag() const
Definition: baby_base.cc:7299
virtual void GetEntry(const long entry)
Definition: baby_full.cc:38
std::vector< float > const & sys_fs_udsgtag() const
Definition: baby_base.cc:7101
float const & w_lep() const
Definition: baby_base.cc:5517
std::vector< float > const & sys_murf() const
Definition: baby_base.cc:7244
float const & w_pu() const
Definition: baby_base.cc:5539
std::vector< float > const & sys_bctag() const
Definition: baby_base.cc:7035
tuple ch
Definition: nev_check.py:38
tuple chglob
Definition: nev_check.py:39
float const & w_toppt() const
Definition: baby_base.cc:5550
float const & w_btag() const
Definition: baby_base.cc:5462
float wISR(int nisr, std::vector< float > &sys_isr)
Definition: utilities.cc:612
std::vector< TString > dirlist(const TString &folder, const TString &inname="dir", const TString &tag="")
Definition: utilities.cc:287
int const & nleps() const
Definition: baby_base.cc:5836