19 #include "TDirectory.h" 20 #include "TMVA/Tools.h" 21 #include "TMVA/Factory.h" 33 int main(
int argc,
char *argv[]){
35 if(argc > 2) tag = argv[2];
38 string hostname =
execute(
"echo $HOSTNAME");
40 bfolder =
"/net/cms2";
42 TString ntufolder=bfolder+
"/cms2r0/babymaker/babies/2015_01_30/small_tree/skim_ht500met200/";
43 TString bdtfolder=bfolder+
"/cms2r0/babymaker/babies/2015_01_30/small_tree/bdt"+tag+
"/";
45 if(argc > 1)
trainBDT(ntufolder, bdtfolder);
53 TString
lsp =
"{#lower[-0.1]{#tilde{#chi}}#lower[0.2]{#scale[0.85]{^{0}}}#kern[-1.3]{#scale[0.85]{_{1}}}}";
54 TString t1t_label =
"#tilde{g}#kern[0.2]{#tilde{g}}, #tilde{g}#rightarrowt#kern[0.18]{#bar{t}}#kern[0.18]"+
lsp;
55 vector<TString> v_t1t_tt_htmj_30;
56 v_t1t_tt_htmj_30.push_back(bdtfolder+
"bdt_T1tttt1500_TTJet_htmj_30.root");
57 vector<sample_class> t1t_tt_htmj_30;
58 t1t_tt_htmj_30.push_back(
sample_class(t1t_label+
" (1500,100)", v_t1t_tt_htmj_30));
59 t1t_tt_htmj_30.push_back(
sample_class(
"t#bar{t}", v_t1t_tt_htmj_30));
61 vector<TString> v_t1t_tt_htnjets30;
62 v_t1t_tt_htnjets30.push_back(bdtfolder+
"bdt_T1tttt1500_TTJet_htnjets30.root");
63 vector<sample_class> t1t_tt_htnjets30;
64 t1t_tt_htnjets30.push_back(
sample_class(t1t_label+
" (1500,100)", v_t1t_tt_htnjets30));
65 t1t_tt_htnjets30.push_back(
sample_class(
"t#bar{t}", v_t1t_tt_htnjets30));
67 vector<TString> v_t1t_tt_njets30mj_30;
68 v_t1t_tt_njets30mj_30.push_back(bdtfolder+
"bdt_T1tttt1500_TTJet_njets30mj_30.root");
69 vector<sample_class> t1t_tt_njets30mj_30;
70 t1t_tt_njets30mj_30.push_back(
sample_class(t1t_label+
" (1500,100)", v_t1t_tt_njets30mj_30));
71 t1t_tt_njets30mj_30.push_back(
sample_class(
"t#bar{t}", v_t1t_tt_njets30mj_30));
74 vector<TString> v_t1tc_tt_htmj_30;
75 v_t1tc_tt_htmj_30.push_back(bdtfolder+
"bdt_T1tttt1200_TTJet_htmj_30.root");
76 vector<sample_class> t1tc_tt_htmj_30;
77 t1tc_tt_htmj_30.push_back(
sample_class(t1t_label+
" (1200,800)", v_t1tc_tt_htmj_30));
78 t1tc_tt_htmj_30.push_back(
sample_class(
"t#bar{t}", v_t1tc_tt_htmj_30));
80 vector<TString> v_t1tc_tt_htnjets30;
81 v_t1tc_tt_htnjets30.push_back(bdtfolder+
"bdt_T1tttt1200_TTJet_htnjets30.root");
82 vector<sample_class> t1tc_tt_htnjets30;
83 t1tc_tt_htnjets30.push_back(
sample_class(t1t_label+
" (1200,800)", v_t1tc_tt_htnjets30));
84 t1tc_tt_htnjets30.push_back(
sample_class(
"t#bar{t}", v_t1tc_tt_htnjets30));
86 vector<TString> v_t1tc_tt_njets30mj_30;
87 v_t1tc_tt_njets30mj_30.push_back(bdtfolder+
"bdt_T1tttt1200_TTJet_njets30mj_30.root");
88 vector<sample_class> t1tc_tt_njets30mj_30;
89 t1tc_tt_njets30mj_30.push_back(
sample_class(t1t_label+
" (1200,800)", v_t1tc_tt_njets30mj_30));
90 t1tc_tt_njets30mj_30.push_back(
sample_class(
"t#bar{t}", v_t1tc_tt_njets30mj_30));
93 int ht_col(kRed+1), mj_col(kAzure+2), col2(kGreen+1), col3(1), col4(kOrange+7);
94 int mj_style(8);
float mj_size(2.);
95 vector<marker_class> mj_points, ht_points, nj_points;
96 mj_points.push_back(
marker_class(200, mj_size, mj_col, mj_style));
98 mj_points.push_back(
marker_class(600, mj_size, mj_col, mj_style));
99 mj_points.push_back(
marker_class(800, mj_size, mj_col, mj_style));
102 ht_points.push_back(
marker_class(1500, mj_size, ht_col, mj_style));
103 ht_points.push_back(
marker_class(2000, mj_size, ht_col, mj_style));
105 nj_points.push_back(
marker_class(6, mj_size, col4, mj_style));
107 nj_points.push_back(
marker_class(11, mj_size, col4, mj_style));
111 TString mj_s(
"M#lower[-.1]{_{J}}"), nj_s(
"N#lower[-.1]{_{jets}}"), ht_s(
"H#lower[-.1]{_{T}}");
112 vector<var_class> mj_t1t_tt;
113 mj_t1t_tt.push_back(
var_class(t1t_tt_njets30mj_30,
"BDT",0.32, -0.5,
"BDT["+mj_s+
", "+nj_s+
"]",col3,7));
114 mj_t1t_tt.push_back(
var_class(t1t_tt_htnjets30,
"BDT",0.3, -0.5,
"BDT["+ht_s+
", "+nj_s+
"]",col2,2));
115 mj_t1t_tt.push_back(
var_class(t1t_tt_htmj_30,
"BDT",0.23, -0.5,
"BDT["+ht_s+
", "+mj_s+
"]",kMagenta+1,7));
116 mj_t1t_tt.push_back(
var_class(t1t_tt_htmj_30,
"ht",4000,0,
""+ht_s+
"",ht_col,1,ht_points));
117 mj_t1t_tt.push_back(
var_class(t1t_tt_htmj_30,
"njets30",15,0,
""+nj_s+
"",col4,1,nj_points));
118 mj_t1t_tt.push_back(
var_class(t1t_tt_htmj_30,
"mj_30",2000,0,
""+mj_s+
"",mj_col,1,mj_points));
120 vector<var_class> mj_t1tc_tt;
121 mj_t1tc_tt.push_back(
var_class(t1tc_tt_njets30mj_30,
"BDT",0.24, -0.5,
"BDT["+mj_s+
", "+nj_s+
"]",col3,7));
122 mj_t1tc_tt.push_back(
var_class(t1tc_tt_htnjets30,
"BDT",0.24, -0.5,
"BDT["+ht_s+
", "+nj_s+
"]",col2,2));
123 mj_t1tc_tt.push_back(
var_class(t1tc_tt_htmj_30,
"BDT",0.23, -0.5,
"BDT["+ht_s+
", "+mj_s+
"]",kMagenta+1,7));
124 mj_t1tc_tt.push_back(
var_class(t1tc_tt_htmj_30,
"ht",4000,0,
""+ht_s+
"",ht_col,1,ht_points));
125 mj_t1tc_tt.push_back(
var_class(t1tc_tt_htmj_30,
"njets30",15,0,
""+nj_s+
"",col4,1,nj_points));
126 mj_t1tc_tt.push_back(
var_class(t1tc_tt_htmj_30,
"mj_30",2000,0,
""+mj_s+
"",mj_col,1,mj_points));
128 vector<TString> vs_sam;
129 vector<vector<var_class> > v_mj;
130 v_mj.push_back(mj_t1t_tt); vs_sam.push_back(
"t1t_tt");
131 v_mj.push_back(mj_t1tc_tt); vs_sam.push_back(
"t1tc_tt");
133 vector<TString> v_cuts;
134 v_cuts.push_back(
"ht>500&&met>200");
136 for(
unsigned icut(0); icut<v_cuts.size(); icut++){
137 for(
unsigned isam(0); isam<vs_sam.size(); isam++){
138 DrawROC(v_mj[isam], v_cuts[icut],
"mj_bdt_"+vs_sam[isam]);
145 void trainBDT(TString ntufolder, TString bdtfolder){
147 TString nTrain(
"20000"), nTest(
"0");
148 gSystem->mkdir(bdtfolder, kTRUE);
150 vector<bdtvar_class> v_htmj;
154 vector<bdtvar_class> v_htnjets;
156 v_htnjets.push_back(
bdtvar_class(
"njets30",
"N_{jets}",
'I'));
158 vector<bdtvar_class> v_njetsmj;
159 v_njetsmj.push_back(
bdtvar_class(
"njets30",
"N_{jets}",
'I'));
162 vector<bdtvar_class> v_njetsmjhtmet;
163 v_njetsmjhtmet.push_back(
bdtvar_class(
"mj_30",
"M_{J}",
'F'));
164 v_njetsmjhtmet.push_back(
bdtvar_class(
"ht",
"H_{T}",
'F'));
165 v_njetsmjhtmet.push_back(
bdtvar_class(
"njets30",
"N_{jets}",
'I'));
166 v_njetsmjhtmet.push_back(
bdtvar_class(
"met",
"MET",
'F'));
168 vector<TString> v_signal;
169 v_signal.push_back(
"*T1tttt*1500*");
170 v_signal.push_back(
"*T1tttt*1200*");
172 vector<TString> v_bkg;
173 v_bkg.push_back(
"*TTJet*");
175 vector<bdt_class> bdts;
176 for(
unsigned isig(0); isig < v_signal.size(); isig++){
177 for(
unsigned ibkg(0); ibkg < v_bkg.size(); ibkg++){
178 bdts.push_back(
bdt_class(v_htmj, v_signal[isig], v_bkg[ibkg]));
179 bdts.push_back(
bdt_class(v_htnjets, v_signal[isig], v_bkg[ibkg]));
180 bdts.push_back(
bdt_class(v_njetsmj, v_signal[isig], v_bkg[ibkg]));
185 for(
unsigned ibdt(0); ibdt < bdts.size(); ibdt++){
187 TFile tmvaFile(bdtfolder+bdts[ibdt].name+
".root",
"RECREATE");
189 TChain signal(
"tree"), ttbar(
"tree");
190 signal.Add(ntufolder+bdts[ibdt].signal);
191 ttbar.Add(ntufolder+bdts[ibdt].bkg);
194 TMVA::Factory *factory =
new TMVA::Factory(bdts[ibdt].name, &tmvaFile,
"!V:Silent:Color");
195 for(
unsigned ind(0); ind < bdts[ibdt].vars.size(); ind++)
196 factory->AddVariable(bdts[ibdt].vars[ind].variable, bdts[ibdt].vars[ind].name,
197 bdts[ibdt].vars[ind].unit, bdts[ibdt].vars[ind].type);
198 factory->AddSpectator(
"met := met",
"MET",
"GeV",
'F');
199 factory->AddSpectator(
"ht := ht",
"H_{T}",
"GeV",
'F');
200 factory->AddSpectator(
"mj_30 := mj_30",
"M_{J}",
"GeV",
'F');
201 factory->AddSpectator(
"nvmus10 := nvmus10",
"Number of veto muons",
"",
'I');
202 factory->AddSpectator(
"nvels10 := nvels10",
"Number of veto electrons",
"",
'I');
203 factory->AddSpectator(
"nmus := nmus",
"Number of muons",
"",
'I');
204 factory->AddSpectator(
"nels := nels",
"Number of electrons",
"",
'I');
205 factory->AddSpectator(
"njets30 := njets30",
"Number of 30 GeV jets",
"",
'I');
206 factory->AddSignalTree (&signal, 1.);
207 factory->AddBackgroundTree(&ttbar, 1.);
208 factory->SetSignalWeightExpression (
"weight");
209 factory->SetBackgroundWeightExpression(
"weight");
211 TCut mycuts(
""), mycutb(
"");
212 factory->PrepareTrainingAndTestTree( mycuts, mycutb,
"nTrain_Signal="+nTrain+
":nTrain_Background="+nTrain+
213 ":nTest_Signal="+nTest+
":nTest_Background="+nTest+
214 ":SplitMode=Random:NormMode=NumEvents:!V" );
215 factory->BookMethod( TMVA::Types::kBDT,
"BDT",
216 "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" );
218 factory->TrainAllMethods();
219 factory->TestAllMethods();
220 factory->EvaluateAllMethods();
225 cout<<
"\t\t\t\t\t\t\t\t\t\t\tWritten "<<bdtfolder+bdts[ibdt].name+
".root"<<endl;
232 void DrawROC(vector<var_class> vars, TString cuts, TString
tag){
235 const int nbins(1000);
236 vector<vector<TH1D> > histos;
237 TString hname, totcut;
240 for(
unsigned var(0); var<vars.size(); var++){
241 vector<sample_class> samples = vars[var].samples;
242 for(
unsigned sam(0); sam < samples.size(); sam++){
244 for(
unsigned isam(0); isam < samples[sam].files.size(); isam++){
245 chain[sam] =
new TChain(
"TestTree");
246 int nfiles = chain[sam]->Add(samples[sam].files[isam]);
247 if(nfiles==0) cout<<samples[sam].files[isam]<<
" not found"<<endl;
249 histos.push_back(vector<TH1D>());
252 float minh(vars[var].minx), maxh(vars[var].maxx);
255 maxh = vars[var].minx;
257 hname =
"histo"; hname += sam; hname += var;
258 totcut =
"weight*("+cuts+
"&&"+samples[sam].cut+
"&&classID==";
259 totcut += sam; totcut +=
")";
260 histos[sam].push_back(TH1D(hname,
"",nbins,minh,maxh));
261 chain[sam]->Project(hname, vars[var].varname, totcut);
262 chain[sam]->Delete();
266 TH1D base_histo(
"base",
"",1,0.01,1.0);
267 base_histo.SetMinimum(0.0);
268 base_histo.SetMaximum(1.0);
269 base_histo.SetDirectory(0);
271 style.
setTitles(&base_histo, vars[0].samples[0].label+
" efficiency", vars[0].samples[1].label+
" efficiency",
272 "#scale[0.8]{#font[62]{CMS}} #scale[0.6]{#font[52]{Supplementary (Simulation)}}",
"#scale[0.8]{13 TeV}");
276 double legW = 0.2, legH = legSingle*vars.size();
277 TLegend
leg(legX, legY-legH, legX+legW, legY);
278 leg.SetTextSize(0.054); leg.SetFillColor(0); leg.SetFillStyle(0); leg.SetBorderSize(0);
279 leg.SetTextFont(132);
283 for(
unsigned var(0); var<vars.size(); var++){
284 graphs[var] =
MakeROC(histos[0][var], histos[1][var], vars[var].minx < vars[var].maxx, vars[var].cuts);
285 graphs[var].SetLineColor(vars[var].color);
286 graphs[var].SetLineStyle(vars[var].style);
287 if(vars[var].style==1) graphs[var].SetLineWidth(5);
288 else graphs[var].SetLineWidth(8);
289 leg.AddEntry(&(graphs[var]), vars[var].title,
"l");
290 graphs[var].Draw(
"lsame");
294 cuts.ReplaceAll(
".",
"");
295 cuts.ReplaceAll(
"(",
""); cuts.ReplaceAll(
"$",
""); cuts.ReplaceAll(
")",
"");
296 cuts.ReplaceAll(
"[",
""); cuts.ReplaceAll(
"]",
"");
297 cuts.ReplaceAll(
"/",
"_"); cuts.ReplaceAll(
"*",
""); cuts.ReplaceAll(
"&&",
"_");
298 cuts.ReplaceAll(
">=",
"ge"); cuts.ReplaceAll(
"<=",
"se");
299 cuts.ReplaceAll(
">",
"g"); cuts.ReplaceAll(
"<",
"s"); cuts.ReplaceAll(
"=",
"");
300 cuts.ReplaceAll(
"+",
"");
301 TString pname(
"plots/roc_"+tag+
"_"+cuts+
".pdf");
305 pname.ReplaceAll(
"roc_",
"log_roc_");
306 base_histo.SetMinimum(1e-4);
311 TGraph
MakeROC(TH1D &good, TH1D &bad,
const bool less_is_better, vector<marker_class> cuts){
312 const int nbins = good.GetNbinsX();
313 if(bad.GetNbinsX() !=
nbins)
throw logic_error(
"Mismatched number of bins");
318 const double good_tot = good.Integral(0, nbins+1);
319 const double bad_tot = bad.Integral(0, nbins+1);
320 int inibin(0), endbin(nbins+1), dbin(1);
unsigned icut(0);
326 for(
int bin = inibin; bin*dbin<=endbin*dbin; bin+=dbin){
327 const double good_pass = good.Integral(min(endbin,bin), max(endbin,bin));
328 const double bad_pass = bad.Integral(min(endbin,bin), max(endbin,bin));
329 const double x = good_pass/good_tot;
330 const double y = bad_pass/bad_tot;
331 graph.SetPoint(graph.GetN(), x, y);
334 if(icut<cuts.size()){
335 float edge(good.GetXaxis()->GetBinUpEdge(bin));
336 if((edge>=cuts[icut].cut&&!less_is_better) || (edge<=cuts[icut].cut&&less_is_better)){
337 marker.SetMarkerStyle(cuts[icut].style);marker.SetMarkerColor(cuts[icut].color);
338 marker.SetMarkerSize(cuts[icut].size);
339 if(x>0.01 && y>0.0001) marker.DrawMarker(x,y);
344 TString name(good.GetName());
347 graph.SetTitle(name);
352 var_class::var_class(vector<sample_class> isamples, TString ivarname,
float iminx,
float imaxx, TString ititle,
353 int icolor,
int istyle, vector<marker_class> icuts){
354 varname = ivarname; minx = iminx; maxx = imaxx; title = ititle;
356 color = icolor; style = istyle;
361 files = ifiles; label = ilabel; cut = icut;
365 cut=icut; size=isize; color=icolor; style=istyle;
375 for(
unsigned ind(0); ind <
vars.size(); ind++)
name +=
vars[ind].variable;
376 name.ReplaceAll(
"*",
"");
void trainBDT(TString ntufolder, TString bdtfolder)
bdt_class(std::vector< bdtvar_class > ivars, TString isignal, TString ibkg)
bool Contains(const std::string &text, const std::string &pattern)
std::string execute(const std::string &cmd)
int main(int argc, char *argv[])
void DrawROC(vector< var_class > vars, TString cuts, TString tag)
void setTitles(TH1 *h, TString xTitle="", TString yTitle="", TString Left="", TString Right="")
bdtvar_class(TString variable, TString name, char type, TString unit="GeV")
marker_class(float icut, float isize, int icolor, int istyle)
std::vector< bdtvar_class > vars
var_class(std::vector< sample_class > samples, TString ivarname, float iminx, float imaxx, TString ititle, int icolor, int istyle=1, std::vector< marker_class > icuts=std::vector< marker_class >())
TGraph MakeROC(TH1D &good, TH1D &bad, const bool less_is_better, vector< marker_class > cuts)
sample_class(TString ilabel, std::vector< TString > ifiles, TString icut="1")
void plotROC(TString bdtfolder)