babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
validate_ntuples.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 ###### Script that compares the yields in old and new ntuples
4 from ROOT import TChain, TH1D
5 import os, sys, subprocess
6 import glob
7 import json
8 import string
9 import time
10 import math
11 from utilities import *
12 
13 class bcolors:
14  HEADER = '\033[95m'
15  OKBLUE = '\033[94m'
16  OKGREEN = '\033[92m'
17  WARNING = '\033[93m'
18  FAIL = '\033[91m'
19  ENDC = '\033[0m'
20  BOLD = '\033[1m'
21  UNDERLINE = '\033[4m'
22 
23 # Setting folders
24 oldfolder = '/net/cms2/cms2r0/babymaker/babies/2016_01_11/mc/T1tttt/skim_abcd/'
25 newfolder = '/net/cms26/cms26r0/babymaker/2016_04_29/normalized/T1tttt/skim_abcd/'
26 
27 oldfolder = '/net/cms2/cms2r0/babymaker/babies/2016_01_11/mc/T1tttt/'
28 newfolder = '/net/cms26/cms26r0/babymaker/2016_04_29/normalized/T1tttt/'
29 
30 newfolder = '/net/cms27/cms27r0/babymaker/2016_04_29/mc/skim_met100nb2nj4nl0'
31 oldfolder = '/net/cms27/cms27r0/babymaker/2016_04_29/mc/merged_met100nb2nj4nl0'
32 
33 oldfolder = '/net/cms2/cms2r0/babymaker/babies/2015_11_28/mc/skim_1lht500met200'
34 newfolder = '/net/cms27/cms27r0/babymaker/2016_04_29/mc/merged_1lht500met200'
35 
36 oldfolder = '/net/cms2/cms2r0/babymaker/babies/2016_04_29/mc/unskimmed/'
37 newfolder = '/net/cms29/cms29r0/babymaker/babies/2016_08_10/mc/unskimmed/'
38 
39 oweight = "weight/w_toppt/eff_trig"
40 nweight = "weight/w_isr/w_pu"
41 
42 ## Finding tags for each dataset
43 sortedfiles = findBaseSampleNames(newfolder)
44 
45 print '\nOLD FOLDER: '+oldfolder
46 print 'NEW FOLDER: '+newfolder
47 print 'OLD WEIGHT "'+oweight+'" - NEW WEIGHT "'+nweight
48 
49 print '\n{:>40}'.format(' Ntuple name ')+'{:>16}'.format('Difference')+'{:>17}'.format('Old yield'),
50 print '{:>17}'.format('New yield')+'{:>17}'.format('Old entries')+'{:>17}'.format('New entries')
51 print '=' * 128
52 not_in_old = list()
53 not_in_new = list()
54 rows = list()
55 line = 1
56 histo = TH1D("histo","",10,0,10)
57 for ifile in sortedfiles:
58  ochain = TChain("tree")
59  oldntuples = oldfolder+"/*"+ifile+'*root'
60  no = ochain.Add(oldntuples)
61  if no == 0:
62  not_in_old.append(ifile)
63  continue;
64  nchain = TChain("tree")
65  newntuples = newfolder+"/*"+ifile+'*root'
66  nn = nchain.Add(newntuples)
67  if nn == 0:
68  not_in_new.append(ifile)
69  continue;
70 
71  no = ochain.Draw("1>>histo",oweight,"goff")
72  oldtot = histo.Integral()
73  nn = nchain.Draw("1>>histo",nweight,"goff")
74  newtot = histo.Integral()
75 
76  if oldtot != 0 : diff = (newtot-oldtot)*100/oldtot
77  elif newtot == 0 : diff = 0
78  else : diff = 999
79 
80  pretag = ""
81  posttag = ""
82  ## Appending rows with significant differences for later printing
83  if abs(diff) > 150/math.sqrt(no+1) and abs(diff) > 150/math.sqrt(nn+1):
84  rows.append([ifile, abs(diff), oldtot, newtot, no, nn])
85  pretag = bcolors.FAIL
86  posttag = bcolors.ENDC
87  ## Printing all rows
88  print pretag+'{:>40}'.format(ifile)+'{:>14.2f}'.format(diff)+' %'+'{:>17.2f}'.format(oldtot),
89  print '{:>17.2f}'.format(newtot)+'{:>17}'.format(no)+'{:>17}'.format(nn)+posttag
90  if line == 5 :
91  print
92  line = 0
93  line += 1
94 
95 ## Sorting rows by difference
96 if len(rows) > 0:
97  print bcolors.FAIL + "\nSamples off by more than 1.5 sigma"+ bcolors.ENDC
98  print '\n{:>40}'.format(' Ntuple name ')+'{:>16}'.format('Difference')+'{:>17}'.format('Old yield'),
99  print '{:>17}'.format('New yield')+'{:>17}'.format('Old entries')+'{:>17}'.format('New entries')
100  print '=' * 128
101 rows = sorted(rows, key=lambda rows: rows[1])
102 line = 1
103 for row in rows:
104  ifile = row[0]
105  diff = row[1]
106  oldtot = row[2]
107  newtot = row[3]
108  no = row[4]
109  nn = row[5]
110 
111  ## Printing rows with significant differences
112  print '{:>40}'.format(ifile)+'{:>14.2f}'.format(diff)+' %'+'{:>17.2f}'.format(oldtot),
113  print '{:>17.2f}'.format(newtot)+'{:>17}'.format(no)+'{:>17}'.format(nn)
114  if line == 5 :
115  print
116  line = 0
117  line += 1
118 
119 if len(not_in_old) > 0:
120  print bcolors.BOLD + '\nNtuples not found in '+oldfolder+':'+ bcolors.ENDC
121  for ntu in not_in_old:
122  print '\t'+ntu
123 if len(not_in_new) > 0:
124  print bcolors.BOLD + '\nNtuples not found in '+newfolder+':'+ bcolors.ENDC
125  for ntu in not_in_new:
126  print '\t'+ntu
127 
128 print
129 sys.exit(0)
def findBaseSampleNames(folder)
Finding basename for each dataset.
Definition: utilities.py:12