babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
combine_datasets.cxx
Go to the documentation of this file.
1 // combine_datasets: Finds all unique events in a list of datasets
2 
3 #include <ctime>
4 
5 #include <vector>
6 #include <fstream>
7 #include <iostream>
8 #include <set>
9 #include <map>
10 #include <unistd.h> // getopt
11 #include <iomanip> // setw
12 
13 #include "TString.h"
14 #include "TChain.h"
15 #include "TTree.h"
16 #include "TFile.h"
17 #include "TSystem.h"
18 
19 #include "utilities.hh"
20 
21 using namespace std;
22 
23 int main(int argc, char *argv[]){
24  time_t startTime, curTime;
25  time(&startTime);
26 
27  TString file_datasets("txt/singlelep.txt"), infolder(""), outfolder("out/");
28  int begrun(-1), endrun(-1);
29  int c(0);
30  while((c=getopt(argc, argv, "f:i:o:b:e:"))!=-1){
31  switch(c){
32  case 'i':
33  infolder=optarg;
34  break;
35  case 'b':
36  begrun=atoi(optarg);
37  break;
38  case 'e':
39  endrun=atoi(optarg);
40  break;
41  case 'o':
42  outfolder=optarg;
43  break;
44  case 'f':
45  file_datasets=optarg;
46  break;
47  default:
48  break;
49  }
50  }
51  if(file_datasets=="" || infolder==""){
52  cout<<endl<<"Specify input folder and datasets: "
53  <<"./run/combine_datasets.exe -i <infolder> -o <outfolder=out> -f <file_datasets=txt/singlelep.txt> -b <begrun=-1> -e <endrun=-1>"<<endl<<endl;
54  return 1;
55  }
56 
57  TString run_s="_runs"; run_s += begrun;
58  if(endrun>begrun){
59  run_s += "-"; run_s += endrun;
60  }
61  if(begrun>0){
62  if(endrun<begrun){
63  cout<<"You set begrun to "<<begrun<<", and endrun to "<<endrun
64  <<", but endrun has to be >= to begrun. Exiting"<<endl<<endl;
65  return 1;
66  }
67  cout<<"Combining "<<run_s<<" of ntuples in "<<infolder<<endl;
68  }
69 
70  vector<TString> datasets;
71  TString buffer, basename("Run2016");
72  ifstream indata(file_datasets);
73  while(indata){
74  indata >> buffer;
75  if(buffer!=""){
76  datasets.push_back(buffer);
77  basename += ("_"+buffer);
78  }
79  }
80  if(begrun>0) basename += run_s;
81 
82  map<int, map<int, set<Long64_t> > > runs;
83  Long64_t event;
84  int run, lumiblock;
85 
86  for(unsigned idata(0); idata < datasets.size(); idata++){
87  TChain chain("tree");
88  TString filename(infolder+"/*"+datasets[idata]+"*.root");
89  int files = chain.Add(filename);
90  if(files<1) {
91  cout<<"No files found for "<<filename<<endl;
92  continue;
93  }
94  gSystem->mkdir(outfolder, kTRUE);
95  TString outname(outfolder+"/baby_");
96  outname += idata;
97  outname += "_"+basename;
98  outname += ".root";
99  TFile outfile(outname, "RECREATE");
100  outfile.cd();
101 
102  TTree *outtree(chain.CloneTree(0));
103 
104  // TBranch *b_event = chain.Branch("event", &event);
105  // TBranch *b_run = chain.Branch("run", &run);
106  TBranch *b_event(nullptr), *b_lumiblock(nullptr), *b_run(nullptr);
107  chain.SetBranchAddress("event", &event, &b_event);
108  chain.SetBranchAddress("lumiblock", &lumiblock, &b_lumiblock);
109  chain.SetBranchAddress("run", &run, &b_run);
110 
111  long entries(chain.GetEntries()), tree_entry;
112 
113  cout<<endl<<"Doing "<<files<<" files in "<<filename<<" with "<<entries<<" entries"<<endl;
114  time(&startTime);
115  for(int entry(0); entry<entries; entry++){
116  if(entry!=0 && entry%250000==0) {
117  time(&curTime);
118  int seconds(difftime(curTime,startTime));
119 
120  cout<<"Doing entry "<<setw(10)<<addCommas(entry)<<" of "<<addCommas(entries)
121  <<" Took "<<setw(6)<<seconds<<" seconds at "
122  <<setw(4)<<roundNumber(entry,1,seconds*1000.)<<" kHz"<<endl;
123  }
124 
125  // Load "run" first, and check if it's in the range we care about
126  tree_entry = chain.LoadTree(entry);
127  b_run->GetEntry(tree_entry);
128  if(begrun>0 && (run<begrun || run>endrun)) continue;
129  b_lumiblock->GetEntry(tree_entry);
130  b_event->GetEntry(tree_entry);
131 
132  if(runs.find(run) == runs.end()) runs.emplace(run, map<int, set<Long64_t> >{}); // New run
133  auto &lumiblocks = runs.at(run);
134  if(lumiblocks.find(lumiblock) == lumiblocks.end()) lumiblocks.emplace(lumiblock, set<Long64_t>{}); // New lumiblock
135  auto &events = lumiblocks.at(lumiblock);
136  if(events.find(event) == events.end()){ // New event
137  events.emplace(event);
138  // You need to load all branches to copy them into outtree
139  chain.GetEntry(entry);
140  outtree->Fill();
141  }
142  } // Loop over entries
143  outtree->Write();
144  outfile.Write();
145  outfile.Close();
146  time(&curTime);
147  cout<<"Took "<<difftime(curTime,startTime) <<" seconds to write "<<outname<<endl;
148 
149  } // Loop over datasets
150 
151  // for(auto it = events.cbegin(); it != events.cend(); ++it) {
152  // cout << it->first <<", ";
153  // } // Needs c++11
154 
155  if(false){
156  TString txtname(outfolder+"/runs_"+basename+".txt");
157  ofstream txtfile(txtname);
158  int prevrun(0);
159  for(map<int, map<int, set<Long64_t> > >::const_iterator it = runs.begin(); it != runs.end(); ++it) {
160  run = it->first;
161  if(run/1000 != prevrun){
162  prevrun = run/1000;
163  txtfile<<endl;
164  }
165  txtfile << run << " ";
166  }
167  txtfile<<endl;
168  txtfile.close();
169  cout<<endl<<"Written run numbers in "<<txtname<<endl;
170  }
171  cout<<endl<<endl;
172 
173  return 0;
174 }
STL namespace.
tuple run
Parsing run from file name.
int main(int argc, char *argv[])
TString addCommas(double num)
Definition: utilities.cc:499
TString roundNumber(double num, int decimals, double denom=1.)
Definition: utilities.cc:478