ra4_stats  0341147a0dc35f80f4e12c6003afb76a38e2ed6e
run_combine.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 from __future__ import print_function
4 
5 import argparse
6 import os
7 import tempfile
8 import shutil
9 import subprocess
10 import sys
11 import math
12 import scipy.stats
13 
14 import ROOT
15 
16 import utils
17 
18 ROOT.PyConfig.IgnoreCommandLineOptions = True
19 
20 def run_obs_signif(output_path, overwrite, log_path):
21  with utils.ROOTFile(output_path, "read") as rfile:
22  if not overwrite and rfile.Get("sig_obs"):
23  print(" Kept observed significance: {:8.3f}".format(rfile.Get("sig_obs")[0]))
24  return
25 
26  cwd = os.getcwd()
27  work_dir = tempfile.mkdtemp()
28  link_path = os.path.join(work_dir, os.path.basename(output_path))
29  os.symlink(output_path, link_path)
30 
31  os.chdir(work_dir)
32  command = ["combine","-M","ProfileLikelihood","--significance","--uncapped=1","--rMin=-10.","-d",link_path]
33  with open(log_path, "a") as dn:
34  subprocess.check_call(command, stdout=dn, stderr=dn)
35  os.chdir(cwd)
36 
37  result_path = os.path.join(work_dir, "higgsCombineTest.ProfileLikelihood.mH120.root")
38  signif = None
39  with utils.ROOTFile(result_path, "read") as result, utils.ROOTFile(output_path, "update") as rfile:
40  tree = result.Get("limit")
41  tree.GetEntry(0)
42  signif = tree.limit
43  signif_vec = ROOT.TVectorD(1)
44  signif_vec[0] = signif
45  rfile.cd()
46  signif_vec.Write("sig_obs",ROOT.TObject.kWriteDelete)
47  shutil.rmtree(work_dir)
48  print("Saved observed significance: {:8.3f}".format(signif))
49 
50 def run_exp_signif(output_path, overwrite, log_path):
51  with utils.ROOTFile(output_path, "read") as rfile:
52  if not overwrite and rfile.Get("sig_exp"):
53  print(" Kept expected significance: {:8.3f}".format(rfile.Get("sig_exp")[0]))
54  return
55 
56  cwd = os.getcwd()
57  work_dir = tempfile.mkdtemp()
58  link_path = os.path.join(work_dir, os.path.basename(output_path))
59  os.symlink(output_path, link_path)
60 
61  os.chdir(work_dir)
62  command = ["combine","-M","ProfileLikelihood","--significance","--uncapped=1","--rMin=-10.","--expectSignal=1","--toysFreq","-t","-1","-d",link_path]
63  with open(log_path, "a") as dn:
64  subprocess.check_call(command, stdout=dn, stderr=dn)
65  os.chdir(cwd)
66 
67  result_path = os.path.join(work_dir, "higgsCombineTest.ProfileLikelihood.mH120.root")
68  signif = None
69  with utils.ROOTFile(result_path, "read") as result, utils.ROOTFile(output_path, "update") as rfile:
70  tree = result.Get("limit")
71  tree.GetEntry(0)
72  signif = tree.limit
73  signif_vec = ROOT.TVectorD(1)
74  signif_vec[0] = signif
75  rfile.cd()
76  signif_vec.Write("sig_exp",ROOT.TObject.kWriteDelete)
77  shutil.rmtree(work_dir)
78  print("Saved expected significance: {:8.3f}".format(signif))
79 
80 def run_limit(output_path, overwrite, log_path):
81  with utils.ROOTFile(output_path, "read") as rfile:
82  if not overwrite and rfile.Get("lim_obs") and rfile.Get("lim_exp16") and rfile.Get("lim_exp50") and rfile.Get("lim_exp84"):
83  print(" Kept observed limit: {:8.3f}".format(rfile.Get("lim_obs")[0]))
84  print(" Kept expected limit (16%): {:8.3f}".format(rfile.Get("lim_exp16")[0]))
85  print(" Kept expected limit (50%): {:8.3f}".format(rfile.Get("lim_exp50")[0]))
86  print(" Kept expected limit (84%): {:8.3f}".format(rfile.Get("lim_exp84")[0]))
87  return
88 
89  cwd = os.getcwd()
90  work_dir = tempfile.mkdtemp()
91  link_path = os.path.join(work_dir, os.path.basename(output_path))
92  os.symlink(output_path, link_path)
93 
94  os.chdir(work_dir)
95  command = ["combine","-M","Asymptotic","-d",link_path]
96  with open(log_path, "a") as dn:
97  subprocess.check_call(command, stdout=dn, stderr=dn)
98  os.chdir(cwd)
99 
100  result_path = os.path.join(work_dir, "higgsCombineTest.Asymptotic.mH120.root")
101  obs = None
102  exp16 = None
103  exp50 = None
104  exp84 = None
105  with utils.ROOTFile(result_path, "read") as result, utils.ROOTFile(output_path, "update") as rfile:
106  tree = result.Get("limit")
107  for entry in tree:
108  if entry.quantileExpected < 0.:
109  obs = entry.limit
110  elif entry.quantileExpected > 0.15 and entry.quantileExpected < 0.17:
111  exp16 = entry.limit
112  elif entry.quantileExpected > 0.49 and entry.quantileExpected < 0.51:
113  exp50 = entry.limit
114  elif entry.quantileExpected > 0.83 and entry.quantileExpected < 0.85:
115  exp84 = entry.limit
116  obs_vec = ROOT.TVectorD(1)
117  exp16_vec = ROOT.TVectorD(1)
118  exp50_vec = ROOT.TVectorD(1)
119  exp84_vec = ROOT.TVectorD(1)
120  obs_vec[0] = obs
121  exp16_vec[0] = exp16
122  exp50_vec[0] = exp50
123  exp84_vec[0] = exp84
124  rfile.cd()
125  obs_vec.Write("lim_obs",ROOT.TObject.kWriteDelete)
126  exp16_vec.Write("lim_exp16",ROOT.TObject.kWriteDelete)
127  exp50_vec.Write("lim_exp50",ROOT.TObject.kWriteDelete)
128  exp84_vec.Write("lim_exp84",ROOT.TObject.kWriteDelete)
129  shutil.rmtree(work_dir)
130  print("Saved observed limit: {:8.3f}".format(obs))
131  print("Saved expected limit (16%): {:8.3f}".format(exp16))
132  print("Saved expected limit (50%): {:8.3f}".format(exp50))
133  print("Saved expected limit (84%): {:8.3f}".format(exp84))
134 
135 def run_fit(output_path, overwrite, log_path):
136  with utils.ROOTFile(output_path, "read") as rfile:
137  if not overwrite and rfile.Get("fit_b") and rfile.Get("fit_s"):
138  print(" Kept fit results")
139  return
140 
141  cwd = os.getcwd()
142  work_dir = tempfile.mkdtemp()
143  link_path = os.path.join(work_dir, os.path.basename(output_path))
144  os.symlink(output_path, link_path)
145 
146  os.chdir(work_dir)
147  command = ["combine","-M","MaxLikelihoodFit","--forceRecreateNLL","--saveWorkspace","--saveWithUncertainties","--minos=all","-w","w","--dataset","data_obs","-d",link_path]
148  with open(log_path, "a") as dn:
149  subprocess.check_call(command, stdout=dn, stderr=dn)
150  os.chdir(cwd)
151 
152  result_path = os.path.join(work_dir, "mlfit.root")
153  with utils.ROOTFile(result_path, "read") as result, utils.ROOTFile(output_path, "update") as rfile:
154  fit_b = result.Get("fit_b")
155  rfile.cd()
156  fit_b.Write("fit_b",ROOT.TObject.kWriteDelete)
157  fit_s = result.Get("fit_s")
158  rfile.cd()
159  fit_s.Write("fit_s",ROOT.TObject.kWriteDelete)
160  shutil.rmtree(work_dir)
161  print("Saved fit results")
162 
163 def fix_vars(output_path):
164  with utils.ROOTFile(output_path, "read") as rfile:
165  if not rfile.Get("fit_b"):
166  return
167 
168  with utils.ROOTFile(output_path, "update") as rfile:
169  w = rfile.Get("w")
170  f = rfile.Get("fit_b")
171  if not rfile.Get("w_orig"):
172  w.Write("w_orig")
173 
174  pars = f.floatParsFinal()
175  for i in range(pars.getSize()):
176  p = pars.at(i)
177  v = w.var(p.GetName())
178  v.setVal(p.getVal())
179  v.setError(p.getError())
180  w.Write("w", ROOT.TObject.kWriteDelete)
181  print("Set variables to background-only best fit values")
182 
183 def goodness_of_fit(output_path, overwrite, ndof):
184  sat_nll = 0.
185  fit_nll = 0.
186  with utils.ROOTFile(output_path, "read") as rfile:
187  pval = rfile.Get("pval")
188  chi_sq = rfile.Get("chi2")
189  saved_ndof = rfile.Get("ndof")
190  if not overwrite and (pval or chi_sq or saved_ndof):
191  if pval:
192  print(" Kept p-value: {:8.3f}".format(pval[0]))
193  if chi_sq:
194  print(" Kept chi^2: {:8.3f}".format(chi_sq[0]))
195  if saved_ndof:
196  print(" Kept N.D.o.F.: {:4d}".format(int(saved_ndof[0])))
197  return
198 
199  if not rfile.Get("w_orig"):
200  print('Skipping goodness of fit. Run with "--full_fit --overwrite" first to enable.')
201  return
202 
203  w = rfile.Get("w")
204  pdf = w.pdf("model_b")
205  fit_nll = -pdf.getLogVal()
206 
207  sat_nll = 0.
208  variables = w.allVars()
209  iterator = variables.createIterator()
210  variable = iterator.Next()
211  while variable:
212  name = variable.GetName()
213  if "nobs_BLK_" in name or "nobsmc_BLK_" in name:
214  val = variable.getVal()
215  if val > 0:
216  sat_nll -= val*math.log(val) - val - math.lgamma(val+1.)
217  variable = iterator.Next()
218 
219  chi_sq = 2.*(fit_nll-sat_nll)
220  pval = scipy.stats.chi2.sf(chi_sq,ndof)
221  chi_sq_vec = ROOT.TVectorD(1)
222  ndof_vec = ROOT.TVectorD(1)
223  pval_vec = ROOT.TVectorD(1)
224  chi_sq_vec[0] = chi_sq
225  ndof_vec[0] = float(ndof)
226  pval_vec[0] = pval
227 
228  with utils.ROOTFile(output_path, "update") as rfile:
229  rfile.cd()
230  chi_sq_vec.Write("chi2",ROOT.TObject.kWriteDelete)
231  ndof_vec.Write("ndof",ROOT.TObject.kWriteDelete)
232  pval_vec.Write("pval",ROOT.TObject.kWriteDelete)
233 
234  print("Saved p-value: {:8.3f}".format(pval))
235  print("Saved chi^2: {:8.3f}".format(chi_sq))
236  print("Saved N.D.o.F.: {:4d}".format(int(ndof)))
237 
238 def run_combine(workspace_path, output_path, do_full_fit, overwrite, log_path, ndof):
239  workspace_path = utils.full_path(workspace_path)
240  log_path = utils.full_path(log_path)
241 
242  with open(log_path, "w") as log_file:
243  print("Workspace file: {}".format(workspace_path),file=log_file)
244 
245  if output_path is None:
246  output_path = workspace_path
247  else:
248  output_path = utils.full_path(output_path)
249  shutil.copy2(workspace_path, output_path)
250 
251  utils.cmsenv("~/cmssw/CMSSW_7_4_14/src")
252  if do_full_fit:
253  run_fit(output_path, overwrite, log_path)
254  if overwrite:
255  fix_vars(output_path)
256  goodness_of_fit(output_path, overwrite, ndof)
257  run_obs_signif(output_path, overwrite, log_path)
258  run_exp_signif(output_path, overwrite, log_path)
259  run_limit(output_path, overwrite, log_path)
260 
261 if __name__ == "__main__":
262  parser = argparse.ArgumentParser(description="Runs combine to find significance, limits, and best fit parameter values",
263  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
264  parser.add_argument("workspace_file", help="File containing RooWorkspace")
265  parser.add_argument("--full_fit", action="store_true", help="Do full (slow) maximum likelihood fit, storing results. Also sets workspace parameters to best fit values if --overwrite option used.")
266  parser.add_argument("--output_file", default=None, help="Store results in specified file instead of caching in input file")
267  parser.add_argument("--overwrite", action="store_true", help="Overwrite/update already computed results")
268  parser.add_argument("--log_file", default=os.devnull, help="Log file in which to write combine output (not yet working)")
269  parser.add_argument("--ndof", type=int, default=18, help="Number of degrees of freedom for goodness of fit test")
270 
271  args = parser.parse_args()
272 
273  run_combine(args.workspace_file, args.output_file, args.full_fit, args.overwrite, args.log_file, args.ndof)
def run_obs_signif(output_path, overwrite, log_path)
Definition: run_combine.py:20
def run_limit(output_path, overwrite, log_path)
Definition: run_combine.py:80
def cmsenv(path)
Definition: utils.py:10
def full_path(path)
Definition: utils.py:7
def fix_vars(output_path)
Definition: run_combine.py:163
def run_fit(output_path, overwrite, log_path)
Definition: run_combine.py:135
def run_exp_signif(output_path, overwrite, log_path)
Definition: run_combine.py:50
def run_combine(workspace_path, output_path, do_full_fit, overwrite, log_path, ndof)
Definition: run_combine.py:238
def goodness_of_fit(output_path, overwrite, ndof)
Definition: run_combine.py:183