babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
skim_ntuples.cxx
Go to the documentation of this file.
1 // skim_ntuples.cxx: Skims reduced trees
2 // USAGE: ./plot/skim_ntuples.exe infolder outfolder [cuts=\"ht>500&&met>200\"] [njobs=-1] [ijob=-1]
3 
4 
5 #include "utilities.hh"
6 
7 #include <ctime>
8 #include <fstream>
9 #include <iostream>
10 #include <cmath>
11 #include <string>
12 #include <sstream>
13 #include <vector>
14 #include <stdlib.h> /* atoi */
15 
16 #include "TChain.h"
17 #include "TFile.h"
18 #include "TString.h"
19 #include "TMath.h"
20 #include "TSystem.h"
21 #include "TDirectory.h"
22 
23 using namespace std;
24 using std::cout;
25 using std::endl;
26 
27 void onefile_skim(TString infiles, TString outfolder, TString cuts, TString tag);
28 
29 int main(int argc, char *argv[]){
30  time_t startTime;
31  time(&startTime);
32 
33  if(argc < 3) {
34  cout<<endl<<"Required at least 2 arguments: "
35  <<"./plot/skim_ntuples.exe infolder outfolder [cuts=\"nleps==1&&st>500&&met>200&&njets>=6&&nbdm>=1&&mj14>250&&nveto==0\"] "
36  <<"[njobs=0] [ijob=0] [filetag=\"\"]"<<endl<<endl;;
37  return 1;
38  }
39  TString folder(argv[1]), outfolder(argv[2]), cuts="nleps==1&&st>500&&met>200&&njets>=6&&nbdm>=1&&mj14>250&&nveto==0";
40  if(argc >= 4) cuts = argv[3];
41  unsigned njobs(0), ijob(0);
42  if(argc >= 6) {
43  njobs = atoi(argv[4]);
44  ijob = atoi(argv[5]);
45  }
46  TString filetag="";
47  if(argc >= 7) filetag = argv[6];
48 
49  TString tag = cuts; // Using tag to avoid file names too long for TFile
50  if(cuts=="standard") cuts="nleps>=1&&st>500&&met>100";
51  if(cuts=="nl1met200ht500") cuts="nleps>=1&&ht>500&&met>200";
52  if(cuts=="stdnj5") cuts="nleps>=1 && st>500 && met>100 && njets>=5";
53  if(cuts=="abcd") cuts="nleps==1&&st>500&&met>200&&njets>=6&&nbdm>=1&&mj14>250&&nveto==0";
54  if(cuts=="baseline") cuts="nleps==1&&st>500&&met>200&&njets>=6&&nbdm>=1";
55  if(cuts=="sys_abcd")
56  cuts = "nleps==1&&max(st,Max$(sys_st))>500&&max(met,Max$(sys_met))>200&&max(njets,Max$(sys_njets))>=6&&max(nbdm,Max$(sys_nbdm))>=1&&max(mj14,Max$(sys_mj14))>250";
57  if(cuts=="zcand")
58  cuts = "nleps==2&&Max$(leps_pt)>40&&((elel_m>80&&elel_m<100)||(mumu_m>80&&mumu_m<100))";
59  if(cuts=="dy_ht300")
60  cuts = "nvleps==2&&nleps>=1&&Max$(leps_pt)>30&&((elelv_m>80&&elelv_m<100)||(mumuv_m>80&&mumuv_m<100))&&ht>300";
61  if(cuts=="ttisr")
62  cuts = "nleps==2&&Max$(leps_pt)>40&&nbdm==2";
63  if(cuts=="wisr")
64  cuts = "met>100&&Max$(leps_pt)>40&&nbdl==0";
65  if(cuts=="wisrht200")
66  cuts = "ht>200&&met>100&&Max$(leps_pt)>40&&nbdl==0";
67  if(cuts=="qcd")
68  cuts = "ht>1000&&met<50&&(nvmus+nvels)==0";
69  if(cuts=="qcd_njet10")
70  cuts = "ht>1000&&met<50&&(nvmus+nvels)==0&&njets>=10";
71  if(cuts=="mm_std")
72  cuts="Sum$(mm_nleps>=1&&mm_st>500.&&mm_met>200.)>0";
73  if(cuts=="mm_std_nj5mj250")
74  cuts="Sum$(mm_nleps>=1&&mm_st>500&&mm_met>200&&mm_njets>=5&&mm_mj14_lep>250)>0||Sum$(mm_nleps>=1&&mm_st>500&&mm_met>200&&mm_njets>=5&&mm_mj14_nolep>250)>0";
75  if(cuts=="rpvfit")
76  cuts = "max(st,Max$(sys_st))>1200&&max(njets,Max$(sys_njets))>=4&&max(nbdm,Max$(sys_nbdm))>=1&&(max(mj12,Max$(sys_mj12))>500)";
77  if(cuts=="rpvregion")
78  cuts = "((nleps==0&&ht>1500)||(nleps==1&&ht>1200))&&njets>=4&&mj12>=300&&(nbdm>=1||nbdm>=1)";
79  if(cuts=="st1000")
80  cuts = "max(st,Max$(sys_st))>1000";
81  if(cuts=="llm60nj2")
82  cuts = "(mumuv_m>60||elelv_m>60)&&njets>=2";
83  if(cuts=="httrig") // Prescaled HT triggers for fake MET trigger efficiency measurement
84  cuts = "pass&&nvleps==0&&(trig[11]||trig[12]||trig[47]||trig[48]||trig[49]||trig[50]||trig[51]||trig[52]||trig[53]||trig[54])";
85  if(cuts=="httrig_w_leps") // Prescaled HT triggers
86  cuts = "pass&&(trig[11]||trig[12]||trig[47]||trig[48]||trig[49]||trig[50]||trig[51]||trig[52]||trig[53]||trig[54])";
87 
89  TString njcut = "njets>=4&&njets<=5";
90  TString sys_njcut = "(njets==4||sys_njets[1]==4||sys_njets[2]==4||njets==5||sys_njets[1]==5||sys_njets[2]==5)";
91  TString nbcut = "&&(nbt>=2||nbdt>=2)";
92  TString sys_nbcut = "&&max(nbdt,Max$(sys_nbdt))>=2";
93  TString zcand = "&&(mumu_m*(mumu_m>0)+elel_m*(elel_m>0))>80&&(mumu_m*(mumu_m>0)+elel_m*(elel_m>0))<100";
94  TString higtrim = "&&higd_drmax<2.2&&higd_dm<=40&&higd_am<=200";
95  TString sys_higtrim = "&&min(higd_drmax,Min$(sys_higd_drmax))<2.2&&min(higd_dm,Min$(sys_higd_dm))<=40&&min(higd_am,Min$(sys_higd_am))<=200";
96  if(cuts=="higqcd") cuts = njcut +"&& met>150 && nvleps==0";
97  if(cuts=="higloose") cuts = njcut+nbcut+"&& met>150 && nvleps==0";
98  if(cuts=="higtight") cuts = njcut+nbcut+"&& met>150 && nvleps==0 && ntks==0&&!low_dphi"+higtrim;
99  if(cuts=="higsys") cuts = sys_njcut+sys_nbcut+"&& max(met,Max$(sys_met))>150 && nvleps==0 && ntks==0&&!low_dphi"+sys_higtrim;
100  if(cuts=="higlep1") cuts = njcut+nbcut+"&& nleps==1 && Max$(leps_pt)>30";
101  if(cuts=="higlep2") cuts = njcut+zcand+"&& nleps==2 && Max$(leps_pt)>40";
102 
104  if(cuts=="nl2nj3nbdm2zveto")
105  cuts = "nleps>=2&&(elel_m<80||elel_m>100)&&(mumu_m<80||mumu_m>100)&&njets>=3&&nbdm>=2";
106 
107  if(cuts.Contains("mchi")){
108  cuts.ReplaceAll("mchi","");
109  cuts = "Sum$(mc_id==1000023&&mc_mass=="+cuts+")>0";
110  }
111 
113  TString pass="globalTightHalo2016Filter==1&&HBHENoiseFilter==1&&HBHEIsoNoiseFilter==1&&eeBadScFilter==1";
114  pass += "&&EcalDeadCellTriggerPrimitiveFilter==1&&BadChargedCandidateFilter&&BadPFMuonFilter&&NVtx>0&&JetID";
115  if(cuts=="ra2_qcd") cuts = pass+"&&(@Electrons.size()+@Muons.size())==0&&NJets>=3";
116  if(cuts=="ra2_ht300") cuts = pass+"&&HT>300";
117  if(cuts=="ra2_eht300") cuts = pass+"&&Max$(Electrons.Pt()*(abs(Electrons.Eta())<2))>35&&HT>300";
118  if(cuts=="ra2_zmht200") cuts = pass+"&&@ZCandidates.size()>=1&&MHT>200";
119 
120 
121  vector<TString> files = dirlist(folder, "*"+filetag+"*.root");
122  unsigned nfiles(files.size()), ini(0), end(nfiles);
123  if(njobs>0){
124  if(ijob<1 || ijob>njobs){
125  cout<<endl<<"You need to set the 5th argument between 1 and "<<njobs<<endl<<endl;
126  return 1;
127  }
128  unsigned jobfiles = (nfiles+njobs-1)/njobs;
129  unsigned nbigjobs = (nfiles+njobs-1)%njobs+1;
130  if(ijob <= nbigjobs){
131  ini = jobfiles*(ijob-1);
132  end = ini + jobfiles;
133  } else {
134  ini = nbigjobs*jobfiles+(jobfiles-1)*(ijob-1-nbigjobs);
135  end = ini + jobfiles-1;
136  }
137  }
138  cout<<"Doing files "<<ini+1<<" to "<<end<<" out of "<<nfiles<<endl;
139  for(unsigned file(ini); file < end; file++){
140  cout<<file+1<<"/"<<nfiles<<": ";
141  onefile_skim(folder+"/"+files[file], outfolder, cuts, tag);
142  }
143 
144  time_t curTime;
145  time(&curTime);
146  int seconds = difftime(curTime,startTime);
147  cout<<endl<<"Took "<< seconds << " seconds ("<<hoursMinSec(seconds)<<") to skim "<< end-ini<<" files."<<endl<<endl;
148 }
149 
150 void onefile_skim(TString infiles, TString outfolder, TString cuts, TString tag){
151  TString folder(infiles), outfile(infiles);
152  folder.Remove(folder.Last('/')+1, folder.Length());
153 
154  // Finding outfile name
155  outfile.Remove(0, outfile.Last('/')); outfile.ReplaceAll("*","");
156  if(outfile.Contains(".root")) outfile.ReplaceAll(".root","_"+tag+".root");
157  else outfile += ("_"+tag+".root");
158  outfile.ReplaceAll(">=","GE"); outfile.ReplaceAll("<=","SE"); outfile.ReplaceAll("&","_");
159  outfile.ReplaceAll(">","G"); outfile.ReplaceAll("<","S"); outfile.ReplaceAll("=","");
160  outfile.ReplaceAll("(",""); outfile.ReplaceAll(")",""); outfile.ReplaceAll("+","");
161  outfile.ReplaceAll("[",""); outfile.ReplaceAll("]",""); outfile.ReplaceAll("|","_");
162  outfile.ReplaceAll("$",""); outfile.ReplaceAll(",","_"); outfile.ReplaceAll("!","NOT");
163  outfile.ReplaceAll(" ",""); outfile.ReplaceAll("@","");
164  outfile = outfolder+outfile;
165 
166  // Checking if output file exists
167  TString outname(outfile);
168  outname.ReplaceAll(outfolder, ""); outname.ReplaceAll("/", "");
169  vector<TString> outfiles = dirlist(outfolder, outname);
170  if(outfiles.size()>0) {
171  cout<<"File "<<outfile<<" exists. Exiting"<<endl;
172  return;
173  }
174 
175  gSystem->mkdir(outfolder, kTRUE);
176  TFile out_rootfile(outfile, "CREATE");
177  if(out_rootfile.IsZombie() || !out_rootfile.IsOpen()) return;
178  out_rootfile.cd();
179  TChain tree("tree");
180  int nfiles = tree.Add(infiles);
181  TChain treeglobal("treeglobal");
182  treeglobal.Add(infiles);
183 
184  //cout<<"Skimming the "<<nfiles<<" files in "<<infiles<<endl;
185  long nentries(tree.GetEntries());
186  TTree *ctree = tree.CopyTree(cuts);
187  long centries=ctree->GetEntries();
188  if(ctree) ctree->Write();
189  else {
190  cout<<"Could not find tree in "<<infiles<<endl;
191  return;
192  }
193 
195  TFile infile(infiles);
196  if(infile.Get("treeglobal") != 0){
197  out_rootfile.cd();
198  TTree *ctreeglobal = treeglobal.CopyTree("1");
199  if(ctreeglobal) ctreeglobal->Write();
200  //else cout<<"Could not find treeglobal in "<<infiles<<endl;
201  }
202  out_rootfile.Close();
203  cout<<"Written "<<centries<<" entries to "<<outfile<<" from "<<nfiles<<" files and "<<nentries<<" entries."<<endl;
204 }
205 
tuple infiles
Finding tags for each dataset.
TString hoursMinSec(long seconds)
Definition: utilities.cc:599
void onefile_skim(TString infiles, TString outfolder, TString cuts, TString tag)
STL namespace.
int main(int argc, char *argv[])
std::vector< TString > dirlist(const TString &folder, const TString &inname="dir", const TString &tag="")
Definition: utilities.cc:287