ra4_stats  0341147a0dc35f80f4e12c6003afb76a38e2ed6e
test_abcd.cxx
Go to the documentation of this file.
1 #include "test_abcd.hpp"
2 
3 #include <cstdlib>
4 #include <cmath>
5 
6 #include <iostream>
7 #include <iomanip>
8 #include <sstream>
9 #include <limits>
10 #include <string>
11 #include <random>
12 #include <functional>
13 #include <array>
14 #include <algorithm>
15 
16 #include "TMath.h"
17 #include "TH1D.h"
18 #include "TF1.h"
19 #include "TCanvas.h"
20 #include "TLegend.h"
21 #include "TLine.h"
22 
23 using namespace std;
24 
25 namespace{
26  size_t num_bkg_toys = 1e7;
27  size_t num_sig_toys = 1e5;
28 
29  double inf = numeric_limits<double>::infinity();
30 }
31 
32 int main(int argc, char *argv[]){
33  if(argc != 9){
34  cerr << "Must supply 8 rates as arguments: bkg A, bkg B, bkg C, bkg D, sig A, sig B, sig C, sig D" << endl;
35  return EXIT_FAILURE;
36  }
37 
38  //Get a string to record input args
39  string params = GetParamString(argc, argv);
40 
41  //Read background parameters from command line
42  double bkg_a = atof(argv[1]);
43  double bkg_b = atof(argv[2]);
44  double bkg_c = atof(argv[3]);
45  double bkg_d = atof(argv[4]);
46 
47  //Fix background to close under ABCD
48  GetPredictions(bkg_a, bkg_b, bkg_c, bkg_d,
49  bkg_a, bkg_b, bkg_c, bkg_d);
50 
51  //Read background parameters from command line
52  double sig_a = atof(argv[5]);
53  double sig_b = atof(argv[6]);
54  double sig_c = atof(argv[7]);
55  double sig_d = atof(argv[8]);
56 
57  //Compute S+B yields for printing
58  double tot_a = bkg_a + sig_a;
59  double tot_b = bkg_b + sig_b;
60  double tot_c = bkg_c + sig_c;
61  double tot_d = bkg_d + sig_d;
62 
63  //Get ABCD predictions for printing
64  double pred_a, pred_b, pred_c, pred_d;
65  GetPredictions(tot_a, tot_b, tot_c, tot_d,
66  pred_a, pred_b, pred_c, pred_d);
67 
68  //Print rates
69  PrintRates(cout, "Background", bkg_a, bkg_b, bkg_c, bkg_d);
70  PrintRates(cout, " Signal", sig_a, sig_b, sig_c, sig_d);
71  PrintRates(cout, " Total", tot_a, tot_b, tot_c, tot_d);
72  PrintRates(cout, " ABCD", pred_a, pred_b, pred_c, pred_d);
73 
74  //Compute -2 ln(l_null/l_alt) and significance for Asimov data set
75  double q = GetTestStatistic(tot_a, tot_b, tot_c, tot_d);
76  double z = QToZ(q);
77  cout << "Z-score: " << setprecision(6) << z << endl;
78 
79  //PRNG for toys
80  mt19937 prng;
81  InitializePRNG(prng);
82 
83  //Generate toys
84  vector<double> b_qs = SampleTestStatistic(prng, num_bkg_toys, bkg_a, bkg_b, bkg_c, bkg_d);
85  vector<double> sb_qs = SampleTestStatistic(prng, num_sig_toys, tot_a, tot_b, tot_c, tot_d);
86 
87  //Make plots
88  PlotQDistributions(params, b_qs, sb_qs);
89  PlotZDistributions(params, b_qs, sb_qs, z);
90 }
91 
92 string GetParamString(int argc, char *argv[]){
93  ostringstream oss;
94  for(int i = 1; i < argc - 1; ++i){
95  oss << argv[i] << "_";
96  }
97  oss << argv[argc-1];
98 
99  return oss.str();
100 }
101 
102 double GetTestStatistic(double a, double b, double c, double d){
103  //Get ABCD predictions with signal added
104  double pred_a, pred_b, pred_c, pred_d;
105  GetPredictions(a, b, c, d,
106  pred_a, pred_b, pred_c, pred_d);
107 
108  //Compute log-likelihoods
109  double null_ll = BinLogLikelihood(a, pred_a)
110  + BinLogLikelihood(b, pred_b)
111  + BinLogLikelihood(c, pred_c)
112  + BinLogLikelihood(d, pred_d);
113  double alt_ll = BinLogLikelihood(a, a)
114  + BinLogLikelihood(b, b)
115  + BinLogLikelihood(c, c)
116  + BinLogLikelihood(d, d);
117 
118  //Compute the likelihood ratio test statistic
119  double q = 2.*(alt_ll-null_ll);
120  return q >= 0. ? q : 0.;
121 }
122 
123 double QToP(double q){
124  return erfc(sqrt(0.5*q));
125 }
126 
127 double QToZ(double q){
128  return sqrt(q);
129 }
130 
131 double ZToQ(double z){
132  return z*z;
133 }
134 
135 double ZToP(double z){
136  return erfc(z/sqrt(2.));
137 }
138 
139 double PToZ(double p){
140  double cp = 1.-0.5*p;
141  return cp <= 0. ? -inf
142  : cp >= 1. ? inf
143  : TMath::NormQuantile(cp);
144 }
145 
146 double PToQ(double p){
147  return ZToQ(PToZ(p));
148 }
149 
150 vector<double> GetSignificances(const vector<double> &b_qs, const vector<double> &sb_qs){
151  vector<double> out(sb_qs.size());
152  for(size_t i = 0; i < out.size(); ++i){
153  out.at(i) = PToZ(FindFraction(b_qs, sb_qs.at(i)));
154  }
155  return out;
156 }
157 
158 vector<double> GetSignificances(const vector<double> &sb_qs){
159  vector<double> out(sb_qs.size());
160  for(size_t i = 0; i < out.size(); ++i){
161  out.at(i) = QToZ(sb_qs.at(i));
162  }
163  return out;
164 }
165 
166 double FindFraction(const vector<double> &qs, double q){
167  return static_cast<double>(distance(lower_bound(qs.begin(), qs.end(), q), qs.end()))/qs.size();
168 }
169 
170 vector<double> SampleTestStatistic(mt19937 & prng, size_t n, double a, double b, double c, double d){
171  vector<double> out(n);
172  poisson_distribution<unsigned> pa(a), pb(b), pc(c), pd(d);
173  for(size_t i = 0; i < n; ++i){
174  out.at(i) = GetTestStatistic(a > 0. ? pa(prng) : 0.,
175  b > 0. ? pb(prng) : 0.,
176  c > 0. ? pc(prng) : 0.,
177  d > 0. ? pd(prng) : 0.);
178  }
179  sort(out.begin(), out.end());
180  return out;
181 }
182 
183 void GetPredictions(double a_in, double b_in, double c_in, double d_in,
184  double &a_out, double &b_out, double &c_out, double &d_out){
185  double total = a_in + b_in + c_in + d_in;
186  a_out = total ? (a_in+b_in)*(a_in+c_in)/total : 0.;
187  b_out = total ? (a_in+b_in)*(b_in+d_in)/total : 0.;
188  c_out = total ? (a_in+c_in)*(c_in+d_in)/total : 0.;
189  d_out = total ? (b_in+d_in)*(c_in+d_in)/total : 0.;
190 }
191 
192 double BinLogLikelihood(double obs, double pred){
193  return (pred > 0.) ? (obs*log(pred) - pred) : (obs > 0. ? -inf : 0.);
194 }
195 
196 void PrintRates(ostream &out, const string &name,
197  double a, double b, double c, double d){
198  out << fixed << showpoint << setprecision(2);
199  out
200  << name << ": "
201  << ' ' << setw(8) << a
202  << ' ' << setw(8) << b
203  << ' ' << setw(8) << c
204  << ' ' << setw(8) << d
205  << endl;
206 }
207 
208 void InitializePRNG(mt19937 &prng){
209  array<int, mt19937::state_size> vals;
210  random_device rd;
211  generate(vals.begin(), vals.end(), ref(rd));
212  seed_seq ss(vals.begin(), vals.end());
213  prng.seed(ss);
214 }
215 
216 void PlotQDistributions(const string &params,
217  const vector<double> &b_qs,
218  const vector<double> &sb_qs){
219  TH1D hb("hb", (params+";-2*ln(L_{null}/L_{alt});PDF").c_str(), 100, 0., 30.);
220  hb.SetLineColor(kGreen+2);
221  hb.SetLineStyle(1);
222  hb.SetLineWidth(4);
223  hb.SetStats(false);
224  TH1D hsb("hsb", (params+";-2*ln(L_{null}/L_{alt});PDF").c_str(), 100, 0., 30.);
225  hsb.SetLineColor(kRed+2);
226  hsb.SetLineStyle(1);
227  hsb.SetLineWidth(4);
228  hsb.SetStats(false);
229 
230  for(const double &q: b_qs) hb.Fill(q);
231  for(const double &q: sb_qs) hsb.Fill(q);
232 
235 
236  TF1 pdf("pdf", "ROOT::Math::chisquared_pdf(x,1.)", 0., 30.);
237  pdf.SetTitle((params+";-2*ln(L_{null}/L_{alt});PDF").c_str());
238  pdf.SetNpx(1000);
239  pdf.SetLineColor(kBlack);
240  pdf.SetLineStyle(1);
241  pdf.SetLineWidth(4);
242  pdf.SetMaximum(1.);
243  pdf.SetMinimum(1e-8);
244 
245  TLegend l(0.1, 0.1, 0.4, 0.4);
246  l.AddEntry(&hsb, "S+B", "le");
247  l.AddEntry(&hb, "B-only", "le");
248  l.AddEntry(&pdf, "#chi^{2}(1)", "l");
249 
250  TCanvas c;
251  c.SetLogy();
252  pdf.Draw();
253  hb.Draw("e0 same");
254  hsb.Draw("e0 same");
255  l.Draw("same");
256  c.Print(("q_"+params+".pdf").c_str());
257 }
258 
259 void PlotZDistributions(const string &params,
260  const vector<double> &b_qs,
261  const vector<double> &sb_qs,
262  double z){
263  TH1D htoy("htoy", (params+";Significance;PDF").c_str(), 100, 0., 10.);
264  htoy.SetLineColor(kRed+2);
265  htoy.SetLineStyle(1);
266  htoy.SetLineWidth(4);
267  htoy.SetStats(false);
268  TH1D hasympt("hasympt", (params+";Significance;PDF").c_str(), 100, 0., 10.);
269  hasympt.SetLineColor(kBlack);
270  hasympt.SetLineStyle(2);
271  hasympt.SetLineWidth(4);
272  hasympt.SetStats(false);
273  TH1D hnull("hnull", (params+";Significance;PDF").c_str(), 100, 0., 10.);
274  hnull.SetLineColor(kGreen+2);
275  hnull.SetLineStyle(1);
276  hnull.SetLineWidth(4);
277  hnull.SetStats(false);
278  TF1 pdf("pdf", "2*TMath::Gaus(x,0,1,1)", 0., 10.);
279  pdf.SetNpx(1000);
280  pdf.SetTitle((params+";Significance;PDF").c_str());
281  pdf.SetLineColor(kBlack);
282  pdf.SetLineStyle(1);
283  pdf.SetLineWidth(4);
284 
285  vector<double> toy_zs = GetSignificances(b_qs, sb_qs);
286 
287  for(const double &zi: toy_zs) htoy.Fill(zi);
288  for(const double &q: sb_qs) hasympt.Fill(QToZ(q));
289  for(const double &q: b_qs) hnull.Fill(QToZ(q));
290 
291  NormalizeWithOverflow(htoy);
292  NormalizeWithOverflow(hasympt);
293  NormalizeWithOverflow(hnull);
294 
295  pdf.SetMinimum(0.);
296  pdf.SetMaximum(1.);
297 
298  TLine line(z, 0., z, 1.);
299  line.SetLineColor(kRed+2);
300  line.SetLineStyle(3);
301  line.SetLineWidth(2);
302 
303  TLegend leg(0.6, 0.6, 0.9, 0.9);
304  leg.AddEntry(&htoy, "S+B (toys)", "l");
305  leg.AddEntry(&hnull, "B-only (toys)", "l");
306  leg.AddEntry(&hasympt, "S+B (asympt)", "l");
307  leg.AddEntry(&pdf, "B-only (asympt)", "l");
308 
309  TCanvas c;
310  pdf.Draw();
311  hasympt.Draw("hist same");
312  hnull.Draw("hist same");
313  htoy.Draw("hist same");
314  leg.Draw("same");
315  if(z >= 0.) line.Draw("same");
316  c.Print(("z_"+params+".pdf").c_str());
317 }
318 
319 void NormalizeWithOverflow(TH1D &h){
320  int nbins = h.GetNbinsX();
321  double entries = h.GetEntries();
322  h.Sumw2();
323  h.SetBinContent(1, h.GetBinContent(0)+h.GetBinContent(1));
324  h.SetBinError(1, hypot(h.GetBinError(0), h.GetBinError(1)));
325  h.SetBinContent(nbins, h.GetBinContent(nbins)+h.GetBinContent(nbins+1));
326  h.SetBinError(nbins, hypot(h.GetBinError(nbins), h.GetBinError(nbins+1)));
327 
328  h.SetBinContent(0, 0.);
329  h.SetBinError(0, 0.);
330  h.SetBinContent(nbins+1, 0.);
331  h.SetBinError(nbins+1, 0.);
332 
333  h.Scale(1./h.Integral("width"));
334  h.SetEntries(entries);
335 }
336 
337 double RealMax(const TH1D &h){
338  return h.GetBinContent(h.GetMaximumBin());
339 }
double PToZ(double p)
Definition: test_abcd.cxx:139
double RealMax(const TH1D &h)
Definition: test_abcd.cxx:337
double ZToQ(double z)
Definition: test_abcd.cxx:131
vector< double > GetSignificances(const vector< double > &b_qs, const vector< double > &sb_qs)
Definition: test_abcd.cxx:150
STL namespace.
double FindFraction(const vector< double > &qs, double q)
Definition: test_abcd.cxx:166
double GetTestStatistic(double a, double b, double c, double d)
Definition: test_abcd.cxx:102
double PToQ(double p)
Definition: test_abcd.cxx:146
int main(int argc, char *argv[])
Definition: test_abcd.cxx:32
double BinLogLikelihood(double obs, double pred)
Definition: test_abcd.cxx:192
void PrintRates(ostream &out, const string &name, double a, double b, double c, double d)
Definition: test_abcd.cxx:196
double ZToP(double z)
Definition: test_abcd.cxx:135
void InitializePRNG(mt19937 &prng)
Definition: test_abcd.cxx:208
void PlotZDistributions(const string &params, const vector< double > &b_qs, const vector< double > &sb_qs, double z)
Definition: test_abcd.cxx:259
string GetParamString(int argc, char *argv[])
Definition: test_abcd.cxx:92
void GetPredictions(double a_in, double b_in, double c_in, double d_in, double &a_out, double &b_out, double &c_out, double &d_out)
Definition: test_abcd.cxx:183
double QToZ(double q)
Definition: test_abcd.cxx:127
double QToP(double q)
Definition: test_abcd.cxx:123
void NormalizeWithOverflow(TH1D &h)
Definition: test_abcd.cxx:319
void PlotQDistributions(const string &params, const vector< double > &b_qs, const vector< double > &sb_qs)
Definition: test_abcd.cxx:216
vector< double > SampleTestStatistic(mt19937 &prng, size_t n, double a, double b, double c, double d)
Definition: test_abcd.cxx:170