susy_cfa  b611ccad937ea179f86a1f5663960264616c0a20
make_tree.cxx
Go to the documentation of this file.
1 // make_tree: Generates the reduced trees
2 
3 #include <ctime>
4 
5 #include <vector>
6 #include <iostream>
7 #include <fstream>
8 #include <string>
9 #include <unistd.h>
10 #include <string>
11 
12 #include "TString.h"
13 #include "TChain.h"
14 
15 #include "utilities.hpp"
16 #include "event_handler.hpp"
17 #include "small_tree.hpp"
18 
19 using namespace std;
20 
21 void ParseDatasets(TString inFilename, int nfiles, int nbatch, vector<TString> &yes_trig, vector<TString> &no_trig,
22  vector<TString> &files, TString &outname);
23 
24 int main(int argc, char *argv[]){
25  time_t startTime, curTime;
26  time(&startTime);
27 
28  std::string inFilename("");
29  std::string masspoint("");
30  int c(0), Nentries(-1), nfiles(-1), nbatch(-1), total_entries_override(-1);
31  string type = "quick";
32  while((c=getopt(argc, argv, "n:t:i:m:f:b:s:"))!=-1){
33  switch(c){
34  case 'n':
35  Nentries=atoi(optarg);
36  break;
37  case 't':
38  total_entries_override = atoi(optarg);
39  break;
40  case 'f':
41  nfiles=atoi(optarg);
42  break;
43  case 'b':
44  nbatch=atoi(optarg);
45  break;
46  case 'i':
47  inFilename=optarg;
48  break;
49  case 'm':
50  masspoint=optarg;
51  break;
52  case 's':
53  type = optarg;
54  break;
55  default:
56  break;
57  }
58  }
59 
60  TString inFilename_s(inFilename);
61  int len(inFilename_s.Length());
62  if(inFilename_s[len-1] == '/') inFilename_s.Remove(len-1, len);
63  inFilename = inFilename_s;
64  TString outFilename(inFilename_s), folder(inFilename_s);
65  TString outfolder("out/");
66  TString prefix = "small_"+type+"_";
67 
68  vector<TString> files, yes_trig, no_trig;
69  int ini(nfiles*(nbatch-1)), end(nfiles*nbatch), ntotfiles(-1), Ntotentries(-1);
70 
71  //outFilename.ReplaceAll("/cfA",""); // line needed when running directly on the output of CfANtupler
72  // which produces files named cfA_XX.root
73 
74  outFilename.Remove(0,outFilename.Last('/')+1);
75  enum Mode{dir_full, dir_part, one_file, txt_part, unknown};
76  Mode mode = unknown;
77  std::string all_sample_files = inFilename + "/*.root";
78  if(Contains(inFilename, ".txt")){
79  if(nfiles < 0 ) cout<<"Txt file input method in one go not implemented yet. Try with -f -b"<<endl;
80  else {
81  mode = txt_part;
82  TString outname;
83  ParseDatasets(inFilename, nfiles, nbatch, yes_trig, no_trig, files, outname);
84  if(files.size()==0){
85  cout<<"No files for file "<<inFilename<<". Exiting"<<endl<<endl;
86  return 1;
87  }
88  cout<<"Sending job with "<<files.size()<<" files. First is "<<files[0]<<" with "<<files.size()<<" files, "
89  <<yes_trig.size()<<" yes_trig, and "<<no_trig.size()<<" no_trig"<<endl;
90  for(unsigned ind(0); ind<yes_trig.size(); ind++)
91  cout<<"Yes: "<<yes_trig[ind]<<endl;
92  for(unsigned ind(0); ind<no_trig.size(); ind++)
93  cout<<"No: "<<no_trig[ind]<<endl;
94 
95  inFilename = files[0];
96  outFilename = outfolder+prefix+outname+"_files"; outFilename += nfiles;
97  outFilename += "_batch"; outFilename += nbatch; outFilename += ".root";
98  }
99  } else if(!Contains(inFilename, ".root")){
100  if(nfiles>0){ // Doing sample in various parts
101  mode = dir_part;
102  files = dirlist(inFilename, ".root");
103  ntotfiles = static_cast<int>(files.size());
104  if(ini > ntotfiles) {
105  cout<<"Trying to start at file "<<ini<<" but there are only "<<ntotfiles<<". Exiting."<<endl;
106  return 1;
107  }
108  inFilename = folder + "/" + files[ini];
109  outFilename = outfolder+prefix+outFilename+"_files"; outFilename += nfiles;
110  outFilename += "_batch"; outFilename += nbatch; outFilename += ".root";
111 
112  if(end > ntotfiles) end = ntotfiles;
113  }else{
114  mode = dir_full;
115  inFilename = inFilename + "/*.root";
116  outFilename = outfolder+prefix+outFilename+".root";
117  }
118  } else {
119  mode = one_file;
120  outFilename = outfolder+prefix+outFilename;
121  nfiles = -1;
122  }
123 
124  cout<<"Opening "<<inFilename<<endl;
125 
126  TChain chain("cfA/eventB");
127  switch(mode){
128  case dir_part:
129  chain.Add(all_sample_files.c_str());
130  break;
131  case txt_part: // It doesn't matter
132  chain.Add(files[0]);
133  break;
134  case dir_full:
135  case one_file:
136  case unknown:
137  default:
138  chain.Add(inFilename.c_str());
139  break;
140  }
141 
142  event_handler tHandler(inFilename, type);
143  tHandler.ehb->yes_trig = yes_trig; tHandler.ehb->no_trig = no_trig;
144  if(mode==dir_part){
145  cout<<endl<<"Doing files "<<ini+1<<" to "<<end<<" from a total of "<<ntotfiles<<" files."<<endl;
146  for(int ifile(ini+1); ifile < end; ifile++){
147  tHandler.AddFiles((folder + "/" + files[ifile]).Data());
148  }
149  }
150  if(mode==txt_part){
151  for(unsigned ifile(1); ifile < files.size(); ifile++){
152  tHandler.AddFiles(files[ifile].Data());
153  }
154  }
155  if(Nentries > tHandler.TotalEntries() || Nentries < 0) Nentries = tHandler.TotalEntries();
156  if((mode==one_file || mode==dir_full) && Nentries != tHandler.TotalEntries()){
157  Ntotentries = Nentries;
158  }else{
159  Ntotentries = chain.GetEntries("weight>0")-chain.GetEntries("weight<0");
160  }
161  if(total_entries_override > 0) Ntotentries = total_entries_override;
162  time(&curTime);
163  cout<<"Getting started takes "<<difftime(curTime,startTime)<<" seconds. "
164  <<"Making reduced tree with "<<Nentries<<" entries out of "<<tHandler.TotalEntries()
165  <<". "<<Ntotentries<<" entries in the full sample."<<endl;
166  tHandler.ReduceTree(Nentries, outFilename, Ntotentries);
167 
168  cout<<"Wrote "<<outFilename<<endl<<endl;
169  time(&curTime);
170  cout<<Nentries<<" events took "<<difftime(curTime,startTime)<<" seconds"<<endl<<endl;
171 
172  return 0;
173 }
174 
175 
176 void ParseDatasets(TString inFilename, int nfiles, int nbatch, vector<TString> &yes_trig, vector<TString> &no_trig,
177  vector<TString> &files, TString &outname){
178 
179  ifstream file(inFilename);
180  TString dataset, filename;
181  int ifile(1);
182  yes_trig.clear(); no_trig.clear();
183  files.clear();
184  file >> dataset;
185  outname = "Run2015B_";
186  while(file){
187  if(!dataset.Contains("MINIAOD")) continue;
188  TString name(dataset);
189  int len(name.Length());
190  if(name[len-2] == '/') name.Remove(len-2, len-1);
191  name.Remove(0,name.Last('/')+1);
192  name.Remove(name.First('_'), len);
193  outname += ("_"+name);
194  vector<TString> setfiles = dirlist(dataset, ".root");
195 
196  // For some reason, files can't be read some times
197  if(setfiles.size()==0) {
198  int maxretry(5);
199  for(int ind(0); ind<maxretry; ind++){
200  cout<<"No root files found at "<<dataset<<". Retrying "<<ind+1<<"/"<<maxretry<<endl;
201  setfiles = dirlist(dataset, ".root");
202  if(setfiles.size()>0) break;
203  }
204  if(setfiles.size()==0) {
205  cout<<"Giving up. No root files found at "<<dataset<<endl;
206  return;
207  }
208  }
209  int nfilesdir(static_cast<int>(setfiles.size())), ibatch(-1);
210  int njobs(nfilesdir/nfiles+1);
211  bool sendjob(nbatch>=ifile && nbatch<(ifile+njobs));
212  if(sendjob) ibatch = nbatch - ifile;
213  int ini(nfiles*ibatch), end(nfiles*(ibatch+1));
214  cout<<"Adding "<<dataset<<". nbatch "<<nbatch<<", ifile "<<ifile<<", njobs "<<njobs<<", sendjob "<<sendjob
215  <<", nfilesdir "<<nfilesdir<<", ini "<<ini<<", end "<<end;
216 
217  for(int fil(0); fil < nfilesdir; fil++){
218  filename = dataset+"/"+setfiles[fil];
219  if(fil>=ini && fil<end && sendjob) files.push_back(filename);
220  }
221  ifile += njobs;
222  file >> dataset;
223  while(!dataset.Contains("MINIAOD") && file){
224  if(dataset.Contains("_v")){
225  if(sendjob) yes_trig.push_back(dataset);
226  else if(files.size()==0) no_trig.push_back(dataset);
227  }
228  file >> dataset;
229  } // Looping over the input file for triggers
230  cout<<". It has "<<yes_trig.size()<<" yes_trig and "<<no_trig.size()<<" no_trig"<<endl;
231  } // Looping over the input file for datasets
232  if(files.size()==0){
233  cout<<"No files for file "<<inFilename<<" and batch job "<<nbatch<<". Max batch number "<<ifile-1<<endl;
234  }
235 }
236 
string files
Definition: data_combine.py:33
std::vector< TString > yes_trig
string outfolder
Definition: data_combine.py:17
long TotalEntries() const
bool Contains(const std::string &text, const std::string &pattern)
void ParseDatasets(TString inFilename, int nfiles, int nbatch, vector< TString > &yes_trig, vector< TString > &no_trig, vector< TString > &files, TString &outname)
Definition: make_tree.cxx:176
STL namespace.
int main(int argc, char *argv[])
Definition: make_tree.cxx:24
std::vector< TString > no_trig
event_handler_base * ehb
void AddFiles(const std::string &file)
void ReduceTree(int num_entries, const TString &out_file_name, int num_total_entries)
string filename
Definition: data_combine.py:22
std::vector< TString > dirlist(const TString &folder, const TString &inname="dir", const TString &tag="")
Definition: utilities.cpp:182