susy_cfa  b611ccad937ea179f86a1f5663960264616c0a20
combine_datasets.cxx
Go to the documentation of this file.
1 // combine_datasets: Finds all unique events in a list of datasets
2 
3 #include <ctime>
4 
5 #include <vector>
6 #include <fstream>
7 #include <iostream>
8 #include <set>
9 #include <map>
10 #include <unistd.h> // getopt
11 #include <iomanip> // setw
12 
13 #include "TString.h"
14 #include "TChain.h"
15 #include "TTree.h"
16 #include "TFile.h"
17 #include "TSystem.h"
18 
19 #include "utilities.hpp"
20 
21 using namespace std;
22 
23 int main(int argc, char *argv[]){
24  time_t startTime, curTime;
25  time(&startTime);
26 
27  TString file_datasets("txt//datasamples/singlelep.txt"), infolder(""), outfolder("out/"), tag("");
28  int c(0);
29  while((c=getopt(argc, argv, "f:i:o:t:"))!=-1){
30  switch(c){
31  case 'i':
32  infolder=optarg;
33  break;
34  case 'o':
35  outfolder=optarg;
36  break;
37  case 't':
38  tag=optarg;
39  break;
40  case 'f':
41  file_datasets=optarg;
42  break;
43  default:
44  break;
45  }
46  }
47  if(file_datasets=="" || infolder==""){
48  cout<<endl<<"Specify input folder and datasets: "
49  <<"./run/combine_datasets.exe -i <infolder> -o <outfolder=out> -f <file_datasets=txt/datasamples/singlelep.txt>"<<endl<<endl;
50  return 1;
51  }
52 
53  vector<TString> datasets;
54  TString buffer, basename("Run"+tag);
55  ifstream indata(file_datasets);
56  while(indata){
57  indata >> buffer;
58  if(buffer!=""){
59  datasets.push_back(buffer);
60  basename += ("_"+buffer);
61  }
62  }
63 
64  map<int, set<Long64_t> > events;
65  int run;
66  Long64_t event;
67 
68  for(unsigned idata(0); idata < datasets.size(); idata++){
69  TChain chain("tree"), treeglobal("treeglobal");
70  TString filename(infolder+"/*"+datasets[idata]+"*"+tag+"*.root");
71  int files = chain.Add(filename);
72  if(files<1) {
73  cout<<"No files found for "<<filename<<endl;
74  continue;
75  }
76  treeglobal.Add(filename);
77  gSystem->mkdir(outfolder, kTRUE);
78  TString fulloutname(outfolder+"/baby_"+basename+"_");
79  fulloutname += idata; fulloutname += ".root";
80 
81  // Checking if output file exists
82  TString outname(fulloutname);
83  outname.ReplaceAll(outfolder, ""); outname.ReplaceAll("/", "");
84  vector<TString> outfiles = dirlist(outfolder, outname);
85  if(outfiles.size()>0) {
86  cout<<"File "<<fulloutname<<" exists. Exiting"<<endl;
87  return 1;
88  }
89 
90  TFile outfile(fulloutname, "RECREATE");
91  outfile.cd();
92 
93  TTree *outtree(chain.CloneTree(0));
94 
95  chain.SetBranchAddress("event", &event);
96  chain.SetBranchAddress("run", &run);
97 
98  long entries(chain.GetEntries());
99  // entries = 100000;
100 
101  cout<<endl<<"Doing "<<files<<" files in "<<filename<<" with "<<entries<<" entries"<<endl;
102  for(int entry(0); entry<entries; entry++){
103  chain.GetEntry(entry);
104  if(entry!=0 && entry%250000==0) {
105  time(&curTime);
106  double seconds(difftime(curTime,startTime));
107 
108  cout<<"Doing entry "<<setw(10)<<addCommas(static_cast<double>(entry))<<" of "<<addCommas(static_cast<double>(entries))
109  <<" Took "<<setw(6)<<seconds<<" seconds at "
110  <<setw(4)<<roundNumber(static_cast<double>(entry),1,seconds*1000.)<<" kHz"<<endl;
111  }
112 
113  if(events.find(run) == events.end()) events[run] = set<Long64_t>(); // New run
114  if(events[run].find(event) == events[run].end()){ // New event
115  events[run].insert(event);
116  outtree->Fill();
117  }
118  } // Loop over entries
119  outtree->Write();
120  treeglobal.CloneTree(-1,"fast");
121  outfile.Write();
122  outfile.Close();
123  time(&curTime);
124  cout<<"Took "<<difftime(curTime,startTime) <<" seconds to write "<<fulloutname<<endl;
125  time(&startTime);
126 
127  } // Loop over datasets
128 
129  // for(auto it = events.cbegin(); it != events.cend(); ++it) {
130  // cout << it->first <<", ";
131  // } // Needs c++11
132 
133  TString txtname(outfolder+"/runs_"+basename+".txt");
134  ofstream txtfile(txtname);
135  int prevrun(0);
136  for(map<int, set<Long64_t> >::const_iterator it = events.begin(); it != events.end(); ++it) {
137  run = it->first;
138  if(run/1000 != prevrun){
139  prevrun = run/1000;
140  txtfile<<endl;
141  }
142  txtfile << run << " ";
143  }
144  txtfile<<endl;
145  txtfile.close();
146  cout<<endl<<"Written run numbers in "<<txtname<<endl;
147 
148  cout<<endl<<endl;
149 
150  return 0;
151 }
string files
Definition: data_combine.py:33
string outfolder
Definition: data_combine.py:17
TString roundNumber(double num, int decimals, double denom=1.)
Definition: utilities.cpp:245
STL namespace.
int main(int argc, char *argv[])
string filename
Definition: data_combine.py:22
std::vector< TString > dirlist(const TString &folder, const TString &inname="dir", const TString &tag="")
Definition: utilities.cpp:182
TString addCommas(double num)
Definition: utilities.cpp:266