ra4_macros  bede988c286599a3a84b77a4d788ac0a971e89f9
parse_card.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 import optparse
4 from math import sqrt
5 
6 def isfloat(value):
7  try:
8  float(value)
9  return True
10  except ValueError:
11  return False
12 
13 def resize(na, nb, init):
14  x = []
15  for i in range(0, na):
16  x.append([init]*nb)
17  return x
18 
19 def GetBinNames(file, lines, num_bins):
20  bin_name = [None]*num_bins
21  method_loc = file.name.find("_method_")
22  if method_loc == -1 or (method_loc+8)>=len(file.name) or not file.name[method_loc+8].isdigit():
23  for bin in range(0, num_bins):
24  bin_name[bin] = str(bin)
25  else:
26  method = int(file.name[method_loc+8])
27  if (method == 0 or method == 1 or method == 2) and num_bins == 4:
28  bin_name[0] = "low $E_T^{\\text{miss}}$, low $N_j$"
29  bin_name[1] = "low $E_T^{\\text{miss}}$, high $N_j$"
30  bin_name[2] = "high $E_T^{\\text{miss}}$, low $N_j$"
31  bin_name[3] = "high $E_T^{\\text{miss}}$, high $N_j$"
32  elif method == 3 and num_bins == 6:
33  bin_name[0] = "low $E_T^{\\text{miss}}$, low $N_j$, low $N_b$"
34  bin_name[1] = "low $E_T^{\\text{miss}}$, low $N_j$, high $N_b$"
35  bin_name[2] = "low $E_T^{\\text{miss}}$, high $N_j$, low $N_b$"
36  bin_name[3] = "low $E_T^{\\text{miss}}$, high $N_j$, high $N_b$"
37  bin_name[4] = "high $E_T^{\\text{miss}}$, low $N_j$"
38  bin_name[5] = "high $E_T^{\\text{miss}}$, high $N_j$"
39  else:
40  for bin in range(0, num_bins):
41  bin_name[bin] = str(bin)
42 
43  return bin_name
44 
45 def GetProcessNames(file, lines, num_procs):
46  proc_names = [None]*num_procs
47  done = False
48 
49  sig_name = "Signal"
50  if file.name.find("T1tttt") != -1:
51  if file.name.find("T1tttt_1500_100") != -1:
52  sig_name = "T1tttt(1500,100)"
53  elif file.name.find("T1tttt_1200_800") != -1:
54  sig_name = "T1tttt(1200,800)"
55  else:
56  sig_name = "T1tttt"
57 
58  for line in lines:
59  words = line.split()
60  if words[0] == "process":
61  for proc in range(0, num_procs):
62  proc_names[proc] = words[proc+1]
63  if proc_names[proc] == "sig":
64  proc_names[proc] = sig_name
65  elif proc_names[proc] == "ttbar":
66  name = "$t\\overline{t}$"
67 
68  return proc_names
69 
70 def GetLnn(lines, num_procs, num_bins):
71  lnn_match = [0.]*num_bins
72  lnn_diff = resize(num_procs, num_bins, 0.)
73  lnn_mc = [0.]*num_bins
74  for line in lines:
75  words = line.split()
76  if len(words)>=2 and words[1] == "lnN":
77  for bin in range(0, num_bins):
78  if words[0].find("mc___") != -1:
79  val = words[num_procs*bin+3]
80  if isfloat(val):
81  lnn_mc[bin] += (float(val)-1.)**2
82  else:
83  val = words[num_procs*bin+3]
84  if isfloat(val):
85  all_matched = True
86  for proc in range(1, num_procs):
87  this_val = words[num_procs*bin+2+proc]
88  if this_val != val:
89  all_matched = False
90  if all_matched:
91  lnn_match[bin] += (float(val)-1.)**2
92  else:
93  for proc in range(1, num_procs):
94  this_val = words[num_procs*bin+2+proc]
95  if isfloat(this_val):
96  lnn_diff[proc][bin] += (float(this_val)-1.)**2
97 
98  for bin in range(0, num_bins):
99  lnn_mc[bin] = sqrt(lnn_mc[bin])
100  lnn_match[bin] = sqrt(lnn_match[bin])
101  for proc in range(0, num_procs):
102  lnn_diff[proc][bin] = sqrt(lnn_diff[proc][bin])
103 
104  return (lnn_match, lnn_diff, lnn_mc)
105 
106 def GetGamma(lines, num_procs, num_bins):
107  gmn_raw = resize(num_procs, num_bins, None)
108  gmn_wght = resize(num_procs, num_bins, None)
109  for line in lines:
110  words = line.split()
111  if len(words)>=3 and words[1]=="gmN" and isfloat(words[2]):
112  for proc in range(0, num_procs):
113  for bin in range(0, num_bins):
114  i = num_procs*bin+proc+3
115  if isfloat(words[i]):
116  gmn_raw[proc][bin] = float(words[2])
117  gmn_wght[proc][bin] = float(words[i])
118  return (gmn_raw, gmn_wght)
119 
120 def GetBkgGamma(gmn_raw, gmn_wght, num_procs, num_bins):
121  count = [0.]*num_bins
122  ss = [0.]*num_bins
123 
124  out_raw = [0.]*num_bins
125  out_wght = [0.]*num_bins
126 
127  for bin in range(0, num_bins):
128  for proc in range(1, num_procs):
129  count[bin] += gmn_raw[proc][bin]*gmn_wght[proc][bin]
130  ss[bin] += gmn_raw[proc][bin]*(gmn_wght[proc][bin]**2)
131  out_raw[bin] = (count[bin]**2)/ss[bin]
132  out_wght[bin] = ss[bin]/count[bin]
133 
134  return (out_raw, out_wght)
135 
136 def GetData(lines, num_bins):
137  data = [None]*num_bins
138  for line in lines:
139  if line.find("observation") != -1:
140  words = line.split()
141  for bin in range(0,num_bins):
142  data[bin] = float(words[bin+1])
143  return data
144 
145 def GetCounts(gmn_raw, gmn_wght, num_procs, num_bins):
146  counts = resize(num_procs, num_bins, 0.)
147  for proc in range(0, num_procs):
148  for bin in range(0, num_bins):
149  counts[proc][bin] = gmn_raw[proc][bin]*gmn_wght[proc][bin]
150 
151  return counts
152 
153 def GetMeans(gmn_raw, gmn_wght, num_procs, num_bins):
154  counts = resize(num_procs, num_bins, 0.)
155  for proc in range(0, num_procs):
156  for bin in range(0, num_bins):
157  counts[proc][bin] = (gmn_raw[proc][bin]+1.)*gmn_wght[proc][bin]
158 
159  return counts
160 
161 def GetCountFracUncert(gmn_raw, lnn_match, lnn_diff, lnn_mc, num_procs, num_bins):
162  uncerts = resize(num_procs, num_bins, 0.)
163  for proc in range(0, num_procs):
164  for bin in range(0, num_bins):
165  uncerts[proc][bin] = 1./(gmn_raw[proc][bin]+1.)
166  uncerts[proc][bin] += lnn_match[bin]**2
167  uncerts[proc][bin] += lnn_diff[proc][bin]**2
168  uncerts[proc][bin] += lnn_mc[bin]**2
169  uncerts[proc][bin] = sqrt(uncerts[proc][bin])
170 
171  return uncerts
172 
173 def GetKappa(lines, num_procs, num_bins):
174  kappa = [None]*num_bins
175  for line in lines:
176  words = line.split()
177  if len(words) >= num_bins*num_procs+1 and words[0] == "#kappa":
178  for bin in range(0, num_bins):
179  kappa[bin] = float(words[num_procs*bin+2])
180  return kappa
181 
182 def GetKappaFracUncert(bkg_raw, lnn_mc, num_procs, num_bins):
183  out = [0.]*num_bins
184  if num_procs == 1:
185  return out
186 
187  for bin in range(0, num_bins):
188  out[bin] = sqrt(1./(bkg_raw[bin]+1.) + lnn_mc[bin])
189  return out
190 
191 def GetPrediction(bkg_raw, bkg_wght, num_bins):
192  preds = [0.]*num_bins
193  for bin in range(0, num_bins):
194  preds[bin] = bkg_raw[bin]*bkg_wght[bin]
195  return preds
196 
197 def GetPredictionMean(bkg_raw, bkg_wght, num_bins):
198  preds = [0.]*num_bins
199  for bin in range(0, num_bins):
200  preds[bin] = (bkg_raw[bin]+1.)*bkg_wght[bin]
201  return preds
202 
203 def GetPredictionFracUncert(gmn_raw, gmn_wght, lnn_match, lnn_diff, lnn_mc, num_procs, num_bins):
204  uncerts = [0.]*num_bins
205  for bin in range(0, num_bins):
206  uncerts[bin] += lnn_match[bin]**2 + lnn_mc[bin]**2
207  num = 0.
208  count = 0.
209  ss = 0.
210  for proc in range(1, num_procs):
211  this_count = gmn_raw[proc][bin]*gmn_wght[proc][bin]
212  this_mean = (gmn_raw[proc][bin]+1.)*gmn_wght[proc][bin]
213  this_ss = gmn_raw[proc][bin]*(gmn_wght[proc][bin]**2)
214  this_err = (gmn_raw[proc][bin]+1.)*(gmn_wght[proc][bin]**2)
215  count += this_count
216  ss += this_ss
217  num += this_err+(this_mean*lnn_diff[proc][bin])**2
218  den_raw = count**2/ss
219  den_wght = ss/count
220 
221  uncerts[bin]+= num/(((den_raw+1.)*den_wght)**2)
222  uncerts[bin] = sqrt(uncerts[bin])
223  return uncerts
224 
225 parser = optparse.OptionParser(
226  description="Convert data card to human readable format",
227  )
228 
229 parser.add_option("-f","--file",
230  default = "txt/data_card_2015_05_25_method_0_with_mc_kappa_T1tttt_1500_100_lumi_10_no_tk_veto_ht_500_mt_0_140_mj_0_400_njets_7_9_met_200_400_nb_2_3.txt",
231  dest = "file",
232  help = "Data card file"
233  )
234 
235 (options, args) = parser.parse_args()
236 
237 in_file_name = options.file
238 file = open(in_file_name, 'r')
239 lines = [line.rstrip('\n') for line in file]
240 
241 num_bins = 0
242 num_procs = 1
243 for line in lines:
244  if line.find('imax') != -1:
245  for word in line.split():
246  if word.isdigit():
247  num_bins = int(word)
248  elif line.find('jmax') != -1:
249  for word in line.split():
250  if word.isdigit():
251  num_procs = int(word)+1
252 
253 bin_names = GetBinNames(file, lines, num_bins)
254 proc_names = GetProcessNames(file, lines, num_procs)
255 
256 (lnn_match, lnn_diff, lnn_mc) = GetLnn(lines, num_procs, num_bins)
257 (gmn_raw, gmn_wght) = GetGamma(lines, num_procs, num_bins)
258 (bkg_raw, bkg_wght) = GetBkgGamma(gmn_raw, gmn_wght, num_procs, num_bins)
259 
260 data = GetData(lines, num_bins)
261 count = GetCounts(gmn_raw, gmn_wght, num_procs, num_bins)
262 count_mean = GetMeans(gmn_raw, gmn_wght, num_procs, num_bins)
263 count_unc = GetCountFracUncert(gmn_raw, lnn_match, lnn_diff, lnn_mc, num_procs, num_bins)
264 kappa = GetKappa(lines, num_procs, num_bins)
265 kappa_unc = GetKappaFracUncert(bkg_raw, lnn_mc, num_procs, num_bins)
266 pred = GetPrediction(bkg_raw, bkg_wght, num_bins)
267 pred_mean = GetPredictionMean(bkg_raw, bkg_wght, num_bins)
268 pred_unc = GetPredictionFracUncert(gmn_raw, gmn_wght, lnn_match, lnn_diff, lnn_mc, num_procs, num_bins)
269 
270 out_file_name = in_file_name + "_table.tex"
271 f = open(out_file_name, 'w')
272 f.write("\\documentclass{article}\n")
273 f.write("\\begin{document}\n")
274 f.write("\\begin{table}\n")
275 f.write("\\centering\n")
276 f.write("\\begin{tabular}{rrrrrrr}\n")
277 f.write("\\hline\\hline\n")
278 f.write("Bin & Data & Prediction & $\\kappa$ & $t\\overline{t}$ & Other & Signal\\\\\n")
279 f.write("\\hline\n")
280 
281 for bin in range(0, num_bins):
282  f.write(bin_names[bin] + " & ")
283  f.write("%.2f"%data[bin] + " & ")
284  f.write("$%.2f"%pred[bin] + "\\pm%.2f"%(pred_unc[bin]*pred_mean[bin]) + "$ & ")
285  f.write("$%.2f"%kappa[bin] + "\\pm%.2f"%(kappa_unc[bin]*kappa[bin]) + "$ & ")
286  for proc in range(1, num_procs):
287  f.write("$%.2f"%count[proc][bin] + "\\pm%.2f"%(count_unc[proc][bin]*count_mean[proc][bin]) + "$ & ")
288  f.write("$%.2f"%count[0][bin] + "\\pm%.2f"%(count_unc[0][bin]*count_mean[0][bin]) + "$ \\\\\n")
289 
290 f.write("\\hline\\hline\n")
291 f.write("\\end{tabular}\n")
292 f.write("\\end{table}\n")
293 f.write("\\end{document}\n")
def GetLnn(lines, num_procs, num_bins)
Definition: parse_card.py:70
def GetPredictionMean(bkg_raw, bkg_wght, num_bins)
Definition: parse_card.py:197
def resize(na, nb, init)
Definition: parse_card.py:13
def GetData(lines, num_bins)
Definition: parse_card.py:136
def GetProcessNames(file, lines, num_procs)
Definition: parse_card.py:45
def GetBkgGamma(gmn_raw, gmn_wght, num_procs, num_bins)
Definition: parse_card.py:120
def GetBinNames(file, lines, num_bins)
Definition: parse_card.py:19
def GetCountFracUncert(gmn_raw, lnn_match, lnn_diff, lnn_mc, num_procs, num_bins)
Definition: parse_card.py:161
def GetCounts(gmn_raw, gmn_wght, num_procs, num_bins)
Definition: parse_card.py:145
def isfloat(value)
Definition: parse_card.py:6
def GetMeans(gmn_raw, gmn_wght, num_procs, num_bins)
Definition: parse_card.py:153
def GetGamma(lines, num_procs, num_bins)
Definition: parse_card.py:106
def GetPredictionFracUncert(gmn_raw, gmn_wght, lnn_match, lnn_diff, lnn_mc, num_procs, num_bins)
Definition: parse_card.py:203
def GetKappaFracUncert(bkg_raw, lnn_mc, num_procs, num_bins)
Definition: parse_card.py:182
def GetPrediction(bkg_raw, bkg_wght, num_bins)
Definition: parse_card.py:191
def GetKappa(lines, num_procs, num_bins)
Definition: parse_card.py:173