ra4_macros  bede988c286599a3a84b77a4d788ac0a971e89f9
make_rpv_datacard.cxx
Go to the documentation of this file.
1 // makes a datacard for RPV analysis with or without PDF uncertainties
2 #include <fstream>
3 #include <iostream>
4 #include <map>
5 #include <sstream>
6 #include <string>
7 
8 #include "TH1.h"
9 #include "TString.h"
10 #include "TFile.h"
11 #include "TSystem.h"
12 
13 void outputNormSharing(std::ofstream &file, const std::vector<std::string> &bins);
14 void outputShapeSystematics(std::ofstream &file, const std::vector<std::string> shapeSysts);
15 void outputLognormalSystematics(std::ofstream &file);
16 void outputMCStatisticsSyst(std::ofstream &file, const std::vector<std::string> &bins, const std::string & signalBinName);
17 // determine if a histogram has an entry for a given nB
18 bool hasEntry(const std::string &sample, const std::string &bin, const int nB);
19 
20 namespace {
21  unsigned int nbins;
22  unsigned int nprocesses;
23 }
24 
25 using namespace std;
26 
27 int main(int argc, char *argv[])
28 {
29  bool includePDFUncert = true;
30  bool includeLowMJ = false;
31 // bool includeSignalRegion = true;
32 
33  // signal is added later
34  std::vector<std::string> processes = { "qcd", "ttbar", "wjets", "other"};
35  std::vector<std::string> shapeSysts = {"btag_bc", "btag_udsg",
36  "gs45", "gs67", "gs89", "gs10Inf",
37  "jes", "jer",
38  "pileup","lep_eff", "ttbar_pt",
39  "qcd_flavor",
40  "qcd_muf", "qcd_mur", "qcd_murf",
41  "isr",
42  "ttbar_muf", "ttbar_mur", "ttbar_murf",
43  "wjets_muf", "wjets_mur", "wjets_murf",
44  "other_muf", "other_mur", "other_murf",
45  "fs_btag_bc", "fs_btag_udsg", "fs_lep_eff"};
46 
47  std::string gluinoMass;
48  std::string signalBinName;
49  std::string cardType;
50  if(argc<3) {
51  std::cout << "Syntax: make_rpv_datacard.exe [gluino mass, in GeV] [default/control]" << std::endl;
52  return 1;
53  }
54  else {
55  std::stringstream ss;
56  gluinoMass = argv[1];
57  ss << "signal_M" << gluinoMass;
58  signalBinName = ss.str();
59  // this is supposed to be the first entry in the process list
60  processes.insert(processes.begin(), signalBinName);
61 
62  cardType=argv[2];
63  if(cardType!="control" && cardType!="default" && cardType!="mconly") {
64  std::cout << "Syntax: make_rpv_datacard.exe [gluino mass, in GeV] [default/control/mconly]" << std::endl;
65  return 1;
66  }
67 // else {
68 // if(cardType=="control") includeSignalRegion=false;
69 // if(cardType=="default") includeSignalRegion=true;
70 // if(cardType=="mconly") includeSignalRegion=true;
71 // }
72  }
73 
74  nprocesses=processes.size();
75 
76  std::vector<std::string> bins = {"bin0", "bin1", "bin2",
77  "bin3", "bin4", "bin5"};
78 
79  if(includeLowMJ) {
80  bins.push_back("bin6");
81  bins.push_back("bin7");
82  bins.push_back("bin8");
83  bins.push_back("bin9");
84  }
85  if(cardType=="default" || cardType=="mconly") {
86  bins.push_back("bin10");
87  bins.push_back("bin11");
88  bins.push_back("bin12");
89  bins.push_back("bin13");
90  bins.push_back("bin14");
91  bins.push_back("bin15");
92  bins.push_back("bin16");
93  bins.push_back("bin17");
94  }
95  nbins = bins.size();
96 
97  std::string dataCardPath = gSystem->pwd();
98  if(cardType=="mconly") dataCardPath += "/variations/sum_rescaled_mconly.root";
99  else dataCardPath += "/variations/sum_rescaled.root";
100  TFile *variations = TFile::Open(dataCardPath.c_str());
101  std::ofstream file;
102  std::string filename("datacard_M");
103  filename+=gluinoMass;
104  if(cardType=="control") filename+="_control";
105  else if(cardType=="mconly") filename+="_mconly";
106 
107  if(includePDFUncert) {
108  for(unsigned int i=0; i<100; i++) {
109  TString pdf(Form("w_pdf%d", i));
110  shapeSysts.push_back(pdf.Data());
111  }
112  }
113  else {
114  filename+="_nopdf";
115  }
116  filename+=".dat";
117  file.open(filename);
118 
119  // output header
120  file << "imax " << nbins << " number of bins" << std::endl;
121  file << "jmax " << nprocesses-1 << std::endl;
122  file << "kmax * number of nuisance parameters" << std::endl;
123  file << "------------------------------------" << std::endl;
124 
125  for(unsigned int ibin=0; ibin<nbins; ibin++) {
126  file << "shapes * " << bins.at(ibin) << " " << dataCardPath << " " << bins.at(ibin)
127  << "/$PROCESS " << bins.at(ibin) << "/$PROCESS_$SYSTEMATIC" << std::endl;
128  }
129  file << "------------------------------------" << std::endl;
130  file << "bin ";
131 
132  for(unsigned int ibin=0; ibin<nbins; ibin++) {
133  variations->cd(bins.at(ibin).c_str());
134  file << bins.at(ibin) << " ";
135  }
136  file << "\n";
137  file << "observation ";
138  for(unsigned int ibin=0; ibin<nbins; ibin++) {
139  TString binName(bins.at(ibin));
140  TH1F *hist = static_cast<TH1F*>(variations->Get(Form("%s/data_obs",binName.Data())));
141  file << hist->Integral() << " ";
142  hist->Delete();
143  }
144  file << "\n";
145  file << "------------------------------------" << std::endl;
146  file << "bin ";
147  for(unsigned int ibin=0; ibin<nbins; ibin++) {
148  for(unsigned int iprocess=0; iprocess<nprocesses; iprocess++) {
149  file << bins.at(ibin) << " ";
150  }
151  }
152  file << "\n";
153  file << "process ";
154  for(unsigned int index=0; index<nbins*nprocesses; index++) file << processes.at(index%nprocesses) << " ";
155  file << "\n";
156  file << "process ";
157  for(unsigned int index=0; index<nbins*nprocesses; index++) file << index%nprocesses << " ";
158 
159  file << "\n";
160  file << "rate ";
161  for(unsigned int ibin=0; ibin<nbins; ibin++) {
162  for(unsigned int iprocess=0; iprocess<nprocesses; iprocess++) {
163  TString histName(Form("%s/%s", bins.at(ibin).c_str(), processes.at(iprocess).c_str()));
164  TH1F *hist = static_cast<TH1F*>(variations->Get(histName));
165  file << hist->Integral() << " ";
166  }
167  }
168  file << "\n------------------------------------" << std::endl;
169 
170  //output the normalization sharing between lepton bins
171  outputNormSharing(file, bins);
172 
173  // output shape systematics
174  outputShapeSystematics(file, shapeSysts);
175 
176  // output lognormal lumi uncertainties for signal, wjets and other
178 
179  // output MC statistics nuisance parameters
180  outputMCStatisticsSyst(file, bins, signalBinName);
181 
182  file.close();
183 
184 }
185 
186 // Assumes that processes is of the format {signal, "qcd", "ttbar", ... }
187 void outputNormSharing(std::ofstream &file, const std::vector<std::string> &bins){
188 
189  //create map between bin name and bin index
190  map<string, int> bindex;
191  for(uint ibin=0; ibin<nbins; ibin++)
192  bindex[bins[ibin]]=ibin;
193 
194  //create template line
195  TString line;
196  for(uint idash=0; idash<(nprocesses*nbins); idash++)
197  line+="- ";
198 
199  TString tmpLine;
200  for(auto jbin:bins){
201  tmpLine = line;
202 
203  if(jbin=="bin0"){
204  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
205  tmpLine.Prepend("normqcd_bin0 lnU ");
206  file << tmpLine.Data() << endl;
207  }
208  else if(jbin=="bin1"){
209  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
210  tmpLine.Replace(2*(bindex["bin2"]*nprocesses+1),1,"5");
211  tmpLine.Prepend("normqcd_bin1_2 lnU ");
212  file << tmpLine.Data() << endl;
213  }
214  else if(jbin=="bin2"){
215  tmpLine.Replace(2*(bindex[jbin]*nprocesses+2),1,"5");
216  tmpLine.Replace(2*(bindex["bin0"]*nprocesses+2),1,"5");
217  tmpLine.Replace(2*(bindex["bin1"]*nprocesses+2),1,"5");
218  tmpLine.Prepend("normtt_bin2_0_1 lnU ");
219  file << tmpLine.Data() << endl;
220  }
221  else if(jbin=="bin3"){
222  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
223  tmpLine.Prepend("normqcd_bin3 lnU ");
224  file << tmpLine.Data() << endl;
225  }
226  else if(jbin=="bin4"){
227  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
228  tmpLine.Replace(2*(bindex["bin5"]*nprocesses+1),1,"5");
229  tmpLine.Prepend("normqcd_bin4_5 lnU ");
230  file << tmpLine.Data() << endl;
231  }
232  else if(jbin=="bin5"){
233  tmpLine.Replace(2*(bindex[jbin]*nprocesses+2),1,"5");
234  tmpLine.Replace(2*(bindex["bin3"]*nprocesses+2),1,"5");
235  tmpLine.Replace(2*(bindex["bin4"]*nprocesses+2),1,"5");
236  tmpLine.Prepend("normtt_bin5_3_4 lnU ");
237  file << tmpLine.Data() << endl;
238  }
239  else if(jbin=="bin10"){
240  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
241  tmpLine.Replace(2*(bindex["bin12"]*nprocesses+1),1,"5");
242  tmpLine.Prepend("normqcd_bin10_12 lnU ");
243  file << tmpLine.Data() << endl;
244  }
245  else if(jbin=="bin11"){
246  tmpLine.Replace(2*(bindex[jbin]*nprocesses+2),1,"5");
247  tmpLine.Replace(2*(bindex["bin16"]*nprocesses+2),1,"5");
248  tmpLine.Prepend("normtt_bin11_16 lnU ");
249  file << tmpLine.Data() << endl;
250  }
251  else if(jbin=="bin12"){
252  tmpLine.Replace(2*(bindex[jbin]*nprocesses+2),1,"5");
253  tmpLine.Replace(2*(bindex["bin10"]*nprocesses+2),1,"5");
254  tmpLine.Prepend("normtt_bin12_10 lnU ");
255  file << tmpLine.Data() << endl;
256  }
257  else if(jbin=="bin13"){
258  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
259  tmpLine.Replace(2*(bindex["bin15"]*nprocesses+1),1,"5");
260  tmpLine.Prepend("normqcd_bin13_15 lnU ");
261  file << tmpLine.Data() << endl;
262  }
263  else if(jbin=="bin14"){
264  tmpLine.Replace(2*(bindex[jbin]*nprocesses+2),1,"5");
265  tmpLine.Replace(2*(bindex["bin17"]*nprocesses+2),1,"5");
266  tmpLine.Prepend("normtt_bin14_17 lnU ");
267  file << tmpLine.Data() << endl;
268  }
269  else if(jbin=="bin15"){
270  tmpLine.Replace(2*(bindex[jbin]*nprocesses+2),1,"5");
271  tmpLine.Replace(2*(bindex["bin13"]*nprocesses+2),1,"5");
272  tmpLine.Prepend("normtt_bin15_13 lnU ");
273  file << tmpLine.Data() << endl;
274  }
275  else if(jbin=="bin16"){
276  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
277  tmpLine.Replace(2*(bindex["bin11"]*nprocesses+1),1,"5");
278  tmpLine.Prepend("normqcd_bin16_11 lnU ");
279  file << tmpLine.Data() << endl;
280  }
281  else if(jbin=="bin17"){
282  tmpLine.Replace(2*(bindex[jbin]*nprocesses+1),1,"5");
283  tmpLine.Replace(2*(bindex["bin14"]*nprocesses+1),1,"5");
284  tmpLine.Prepend("normqcd_bin17_14 lnU ");
285  file << tmpLine.Data() << endl;
286  }
287  }
288 }
289 
290 void outputLognormalSystematics(std::ofstream &file)
291 {
292  // luminosity uncertainty is 2.7%
293  file << "lumi lnN ";
294  for(unsigned int ibin=0; ibin<nbins; ibin++) {
295  file << "1.027 - - 1.027 1.027 ";
296  }
297  file << std::endl;
298 
299 }
300 
301 void outputShapeSystematics(std::ofstream &file, const std::vector<std::string> shapeSysts)
302 {
303  for(unsigned int isyst=0; isyst<shapeSysts.size(); isyst++) {
304  file << shapeSysts.at(isyst) << " shape ";
305  if(shapeSysts.at(isyst).find("pdf")!=std::string::npos) {
306  // there are 100 NNPDF variations and so each needs to be scaled down by a factor 1/sqrt(100)
307  for(unsigned int index=0; index<nbins; index++) file << "0.1 0.1 0.1 0.1 0.1 ";
308  }
309  else {
310  for(unsigned int index=0; index<nbins*nprocesses; index++) file << 1.0 << " ";
311  }
312  file << "\n";
313  }
314 }
315 
316 // outputs MC statistical uncertainties
317 void outputMCStatisticsSyst(std::ofstream &file, const std::vector<std::string> &bins, const std::string & signalBinName)
318 {
319  // unsigned int nbins=bins.size();
320  const unsigned int maxB=4;
321 
322  // only signal, qcd, and ttbar have non-negligible MC statistics uncertainties
323  std::vector<std::string> samples = {signalBinName, "qcd", "ttbar"};
324  for(auto isample : samples) {
325  for(unsigned int ibin = 0; ibin<bins.size(); ibin++) {
326  for(unsigned int ibbin=1; ibbin<maxB+1; ibbin++) {
327  if(!hasEntry(isample, bins.at(ibin), ibbin)) continue;
328  file << "mcstat_" << isample << "_" << bins.at(ibin) << "_nb" << ibbin << " shape ";
329  for(unsigned int ientry = 0; ientry<bins.size(); ientry++) {
330  if(ientry == ibin ) {
331  if( isample.find("signal")!=std::string::npos) file << "1 - - - - ";
332  else if( isample == "qcd") file << "- 1 - - - ";
333  else if( isample == "ttbar" ) file << "- - 1 - - ";
334  else if( isample == "wjets" ) file << "- - - 1 - ";
335  else if( isample == "other" ) file << "- - - - 1 ";
336  }
337  else file << "- - - - - ";
338  }
339  file << "\n";
340  }
341  }
342  }
343 }
344 
345 // exclude by hand following bins that have no entries:
346 // signal_mcstat_signal_bin0_nb3
347 // signal_mcstat_signal_bin2_nb4
348 // signal_mcstat_signal_bin3_nb4
349 // signal_mcstat_signal_bin5_nb4
350 // ttbar_mcstat_ttbar_bin0_nb4
351 // ttbar_mcstat_ttbar_bin5_nb4
352 // qcd_mcstat_qcd_bin5_nb3
353 // qcd_mcstat_qcd_bin5_nb4
354 // should do this from the histograms themselves!
355 bool hasEntry(const std::string &sample, const std::string &bin, const int nB)
356 {
357  if(sample.find("signal_M1000")!=std::string::npos) {
358  if(bin=="bin2" && nB==4) return false;
359  if(bin=="bin3" && nB==1) return false;
360  if(bin=="bin3" && nB==3) return false;
361  if(bin=="bin3" && nB==4) return false;
362  }
363  if(sample.find("signal_M1100")!=std::string::npos) {
364  if(bin=="bin3" && nB==4) return false;
365  if(bin=="bin5" && nB==4) return false;
366  }
367  if(sample.find("signal_M1200")!=std::string::npos) {
368  if(bin=="bin0" && nB==4) return false;
369  if(bin=="bin2" && nB==4) return false;
370  if(bin=="bin3" && nB==4) return false;
371  if(bin=="bin5" && nB==4) return false;
372  }
373  if(sample.find("signal_M1300")!=std::string::npos) {
374  if(bin=="bin2" && nB==4) return false;
375  if(bin=="bin3" && nB==1) return false;
376  }
377  if(sample.find("signal_M1400")!=std::string::npos) {
378  if(bin=="bin0" && nB==4) return false;
379  }
380  if(sample=="ttbar") {
381  if(bin=="bin0" && nB==4) return false;
382  if(bin=="bin5" && nB==4) return false;
383  }
384  if(sample=="qcd") {
385  if(bin=="bin5" && nB==3) return false;
386  if(bin=="bin5" && nB==4) return false;
387  if(bin=="bin11" && nB==4) return false;
388  if(bin=="bin14" && nB==4) return false;
389  if(bin=="bin15" && nB==4) return false;
390  }
391  return true;
392 }
STL namespace.
void outputShapeSystematics(std::ofstream &file, const std::vector< std::string > shapeSysts)
void outputLognormalSystematics(std::ofstream &file)
void outputMCStatisticsSyst(std::ofstream &file, const std::vector< std::string > &bins, const std::string &signalBinName)
bool hasEntry(const std::string &sample, const std::string &bin, const int nB)
void outputNormSharing(std::ofstream &file, const std::vector< std::string > &bins)
tuple file
Definition: parse_card.py:238
int main(int argc, char *argv[])