babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
sub_cond.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import os, sys, subprocess
4 import pprint
5 import glob
6 import json
7 import string
8 import time
9 import argparse
10 
11 #What to submit? Use substrings that would be found in the desired dataset, no wild cards!
12 # e.g. if we want only 25ns TTJets, use a substring that contains it all:
13 # "TTJets_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns"
14 # if we want all the HT-binned TTJets append:
15 # "TTJets_HT-"
16 mc_wishlist = []
17 
18 ## Signal scan, 81M events
19 mc_wishlist.append("SMS-T1tttt_mGluino")
20 
21 ## TTJets, 170M events
22 mc_wishlist.append("TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
23 mc_wishlist.append("TTJets_HT-1200to2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
24 mc_wishlist.append("TTJets_HT-2500toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
25 mc_wishlist.append("TTJets_HT-600to800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
26 mc_wishlist.append("TTJets_HT-800to1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
27 mc_wishlist.append("TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
28 mc_wishlist.append("TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
29 mc_wishlist.append("TTJets_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
30 
31 ## QCD, 85M events
32 mc_wishlist.append("QCD_HT1000to1500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
33 mc_wishlist.append("QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
34 mc_wishlist.append("QCD_HT2000toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
35 mc_wishlist.append("QCD_HT200to300_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
36 mc_wishlist.append("QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
37 mc_wishlist.append("QCD_HT500to700_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
38 mc_wishlist.append("QCD_HT700to1000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
39 
40 ## Other MC, 95M events
41 mc_wishlist.append("DYJetsToLL_M-50_HT-100to200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
42 mc_wishlist.append("DYJetsToLL_M-50_HT-200to400_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
43 mc_wishlist.append("DYJetsToLL_M-50_HT-400to600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
44 mc_wishlist.append("DYJetsToLL_M-50_HT-600toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
45 mc_wishlist.append("DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
46 mc_wishlist.append("ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
47 mc_wishlist.append("ST_t-channel_antitop_4f_leptonDecays_13TeV-powheg-pythia8_TuneCUETP8M1_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
48 mc_wishlist.append("ST_t-channel_top_4f_leptonDecays_13TeV-powheg-pythia8_TuneCUETP8M1_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
49 mc_wishlist.append("ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
50 mc_wishlist.append("ST_tW_top_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
51 mc_wishlist.append("TTGJets_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
52 mc_wishlist.append("TTTT_TuneCUETP8M1_13TeV-amcatnlo-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
53 mc_wishlist.append("TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
54 mc_wishlist.append("TTWJetsToQQ_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
55 mc_wishlist.append("TTZToLLNuNu_M-10_TuneCUETP8M1_13TeV-amcatnlo-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
56 mc_wishlist.append("TTZToQQ_TuneCUETP8M1_13TeV-amcatnlo-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
57 mc_wishlist.append("WJetsToLNu_HT-100To200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
58 mc_wishlist.append("WJetsToLNu_HT-200To400_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
59 mc_wishlist.append("WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
60 mc_wishlist.append("WJetsToLNu_HT-600ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
61 mc_wishlist.append("WWTo2L2Nu_13TeV-powheg_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
62 mc_wishlist.append("WWToLNuQQ_13TeV-powheg_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
63 mc_wishlist.append("WZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
64 mc_wishlist.append("WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9")
65 mc_wishlist.append("ttHJetTobb_M125_13TeV_amcatnloFXFX_madspin_pythia8_RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9_ext3")
66 
67 data_wishlist = []
68 #data_wishlist.append("JetHT")
69 #data_wishlist.append("HTMHT")
70 #data_wishlist.append("MET")
71 #data_wishlist.append("SingleElectron")
72 #data_wishlist.append("SingleMuon")
73 #data_wishlist.append("DoubleEG")
74 #data_wishlist.append("DoubleMuon")
75 
76 jsonlist = glob.glob("data/json/subgolden_*.json")
77 
78 # for data get the golden runs
79 goldruns = {}
80 for jsonfile in jsonlist:
81  jdata = {}
82  with open(jsonfile) as jfile:
83  jdata = json.load(jfile)
84  goldruns[jsonfile] = [int(i) for i in jdata.keys()]
85 
86 # Maximum number of input MINIAOD files per condor job
87 maxfiles = int(raw_input('Enter max number of files per job: '))
88 
89 # These keys, one for mc and one for each data period, are used to split the name of a dataset in two parts,
90 # if the substring preceeding the key is the same for two datasets,
91 # they are considered extensions of each other and combined
92 # i.e. the output babies and logs are labeled by 'substring-before-key'+'key'
93 # the sub-string following the key is dropped and forgotten!
94 comb_keys = ['RunIISpring15DR74_Asympt25ns_MCRUN2_74_V9','RunIISpring15FSPremix_MCRUN2_74_V9','RunIISpring15MiniAODv2_74X_mcRun2_asymptotic_v2','Run2015D']
95 
96 # for testing... otherwise set to -1
97 maxjobs = -1
98 maxds = -1
99 maxevents_perjob = -1
100 istest = 'n'
101 if (maxjobs!=-1 or maxevents_perjob!=-1 or maxds!=-1):
102  istest = raw_input('Running in test mode with %i jobs, %i events per job, over %i datasets. Enter \'y\' to continue: ' % (maxjobs, maxevents_perjob, maxds))
103  if (istest!='y'):
104  sys.exit("No jobs submitted. Edit sub_cond.py to exit test mode.")
105 
106 # Only matters if running on UCSD:
107 # To run on multiple T2's use, e.g:
108 whitelist = "T2_US_UCSD,T2_US_WISCONSIN,T2_US_FLORIDA,T2_US_PURDUE,T2_US_NEBRASKA,T2_US_CALTECH"
109 # whitelist = "T2_US_WISCONSIN,T2_US_FLORIDA,T2_US_PURDUE,T2_US_NEBRASKA,T2_US_CALTECH"
110 # Need to check which is better, running on remote T2 or xrootd-ing the data...
111 # To run only at UCSD use:
112 # whitelist = "T2_US_UCSD"
113 
114 # Condor set up depends on whether we run on UCSB or UCSD
115 host = os.environ.get("HOSTNAME")
116 if "ucsd" in host: host = "sd"
117 elif ("compute" in host) or ( "physics.ucsb.edu" in host) : host = "sb"
118 else: sys.exit("\033[91mERROR: Unknown host: "+host+" Exit. \033[0m")
119 print "INFO: Setting up job submission at",('UCSB.' if host=='sb' else 'UCSD.')
120 hadoop = '/mnt/hadoop/cms'
121 if host=="sd": hadoop = '/hadoop/cms/phedex'
122 
123 # Job submission should be done from the babymaker directory, which is under a valid CMSSW release
124 codedir = os.getcwd()
125 if not ("/src/babymaker") in codedir:
126  print "\033[91mERROR: Please submit from path consistent with: .../src/babymaker/ \033[0m\n"
127  sys.exit(0)
128 
129 # Need a valid proxy to submit condor jobs at UCSD
130 # or for fallback at UCSB
131 proxy,valid = "",""
132 proc = subprocess.Popen('voms-proxy-info', stdout=subprocess.PIPE)
133 tmp = proc.stdout.read()
134 if "Proxy not found" in tmp:
135  sys.exit("\033[91mERROR: Proxy not found. \033[0m")
136 elif ('timeleft' in tmp) and ('0:00:00' in tmp):
137  sys.exit("\033[91mERROR: Proxy expired. \033[0m")
138 else:
139  for info in tmp.splitlines():
140  if ("/tmp/x509" in info): proxy = "/tmp/x509"+(string.split(info,"/tmp/x509"))[1]
141  if ("timeleft" in info): valid = "Time left before proxy expires: "+info.split()[-1]
142 
143 print "INFO: Found proxy path",proxy
144 print "INFO:",valid
145 
146 # Default output directory is the "out" sub-directory of the current working directory.
147 outdir = os.getcwd()+'/out/'
148 sub_time = time.strftime("%y%m%d_%H%M%S", time.gmtime())
149 if not (os.path.exists(os.path.realpath(outdir))):
150  sys.exit("\033[91m ERROR: Directory "+outdir+" does not exist. Please either create a sym link or dir. \033[0m")
151 outdir = os.path.join(os.path.realpath(outdir),sub_time)
152 os.mkdir(outdir)
153 
154 logdir = os.path.join(os.getcwd(),"logs", sub_time)
155 if not os.path.exists(logdir):
156  os.makedirs(logdir)
157 print "INFO: Babies will be written to: ", outdir
158 print "INFO: Logs will be written to: ", logdir
159 
160 # this is where the condor submission script and job executable are stored
161 if not (os.path.exists(os.getcwd()+'/run')):
162  os.mkdir(os.getcwd()+'/run')
163 rundir = os.path.join(os.path.realpath(os.getcwd()+'/run'),sub_time)
164 os.mkdir(rundir)
165 print rundir
166 # read in datasets to run over based on the flist_*txt files
167 # where to find the flists
168 flistdir = os.path.join(os.getenv("CMSSW_BASE"),"src/flists/")
169 if not os.path.exists(flistdir):
170  sys.exit("ERROR: flists repository not found.")
171 
172 files_dict = {}
173 nent_dict = {}
174 flists_pd = glob.glob(os.path.join(flistdir,"flist*.txt"))
175 for fnm in flists_pd:
176  if any(wish in fnm for wish in mc_wishlist):
177  dsname = ''
178  if any(ikey in fnm for ikey in comb_keys):
179  for ikey in comb_keys:
180  if ikey in fnm:
181  dsname = string.split(string.split(fnm,"flist_").pop(),ikey)[0] + ikey
182  break
183  else:
184  sys.exit("ERROR: None of the combination keys (%s) were found in this flist:%s\n" % (comb_keys,fnm))
185 
186  print "INFO: Adding PD: ",fnm.replace("flist_","").replace(".txt","")
187 
188  if dsname not in files_dict.keys():
189  nent_dict[dsname] = 0
190  files_dict[dsname] = []
191  with open(fnm) as f:
192  for line in f:
193  if ("nEventsTotal" in line): # this is read instead of calculated to make resubmission simpler
194  nent_dict[dsname] = nent_dict[dsname] + int(line.split().pop())
195  if "/store" not in line: continue
196  col = line.split()
197  files_dict[dsname].append(col[2])
198 
199 # form new datasets from the data split into subperiods
200 for pd in data_wishlist:
201  # book the dataset names for all sub-periods in advance
202  for json in jsonlist:
203  dsname = pd + json.replace('data/json/subgolden','').replace('.json','')
204  files_dict[dsname] = []
205  nent_dict[dsname] = 0 # not filled for data
206  # read flists
207  flists_pd = glob.glob(os.path.join(flistdir,"flist_"+pd+"_Run2015D*.txt"))
208  for fnm in flists_pd:
209  with open(fnm) as f:
210  for line in f:
211  if "/store" not in line: continue
212  col = line.split()
213  runlist = [int(irun) for irun in string.split(col[3],",")]
214  for run in runlist:
215  for jsonfile in goldruns.keys():
216  if run in goldruns[jsonfile]:
217  dsname = pd + jsonfile.replace('data/json/subgolden','').replace('.json','')
218  if (col[2] not in files_dict[dsname]): # don't add same file twice if it has two runs in this subperiod
219  files_dict[dsname].append(col[2])
220 
221 # If on UCSD prep also tarball
222 if (host=="sd"):
223  print "INFO: Creating babymaker tarball to transfer to work node..."
224  tarcmd = "tar --directory=../ --exclude=\"out\" --exclude=\"run\""
225  tarcmd += " --exclude=\"logs\" --exclude=\"bmaker/interface/release.hh\""
226  tarcmd += " --exclude=\"data/csc_beamhalo_filter/*\""
227  tarcmd += " --exclude=\".git\""
228  tarcmd += " -c babymaker | xz > ../babymaker.tar.xz"
229  os.system(tarcmd)
230 
231 total_jobs = 0
232 for ids, ds in enumerate(sorted(files_dict.keys())):
233  if (maxds!=-1 and ids>=maxds): break
234 
235  #release
236  cmssw = "CMSSW_7_4_6_patch6"
237  if ("Run2015" in ds) or ("RunIISpring15MiniAODv2" in ds): cmssw = "CMSSW_7_4_14"
238 
239  # with the list of files in hand, determine the number of condor jobs
240  nfiles = len(files_dict[ds])
241  njobs = maxjobs
242  if (maxjobs==-1): njobs = (nfiles/maxfiles) if (nfiles%maxfiles==0) else (nfiles/maxfiles+1)
243 
244  for job in range(0,njobs):
245  # name the baby
246  bname = "_".join(["baby",ds,"mf"+str(maxfiles),"batch"+str(job)])
247  print("INF0: "+bname)
248 
249  # check if job had already succeeded on previous submission
250  outpath = os.path.join(outdir,bname+".root")
251  if os.path.exists(outpath):
252  print "\033[38m WARNING: "+outpath+" already exists. Skip job submission. \033[0m"
253  continue
254 
255  # list of arguments to the cmsRun job
256  condor_args = []
257  condor_args.append("nEvents="+str(maxevents_perjob))
258  condor_args.append("nEventsSample="+str(nent_dict[ds]))
259  condor_args.append("inputFiles=\\\n"+",\\\n".join(files_dict[ds][(job*maxfiles):((job+1)*maxfiles)]))
260  if (host=="sb"): condor_args.append("outputFile="+outpath)
261  else: condor_args.append("outputFile="+bname+".root")
262  condor_args.append("condorSubTime="+sub_time)
263  if ("Run2015D" in ds):
264  json_name = "data/json/subgolden_Run2015D" + ds.split("Run2015D").pop() + ".json"
265  if (json_name not in jsonlist): sys.exit("ERROR: Could not find json!")
266  condor_args.append("json=babymaker/"+json_name)
267 
268  # Create executable that will be transfered to the work node by condor
269  exefile =rundir+"/"+bname+".sh"
270  fexe = open(exefile,"w")
271  if (host=="sb"):
272  fexe.write("#! /bin/bash\n")
273  fexe.write("source /cvmfs/cms.cern.ch/cmsset_default.sh\n")
274  fexe.write("cd "+codedir+"\n")
275  fexe.write("eval `scramv1 runtime -sh`\n")
276  fexe.write("export ORIGIN_USER="+os.getenv("USER")+"\n")
277  fexe.write("cmsRun bmaker/python/bmaker_basic_cfg.py \\\n"+" \\\n".join(condor_args)+"\n")
278  else:
279  fexe.write("#! /bin/bash\n")
280  fexe.write("source /code/osgcode/cmssoft/cmsset_default.sh\n")
281  fexe.write("export SCRAM_ARCH=slc6_amd64_gcc491\n")
282  fexe.write("eval `scramv1 project CMSSW "+cmssw+"`\n")
283  fexe.write("cd "+cmssw+"/src\n")
284  fexe.write("eval `scramv1 runtime -sh`\n")
285  fexe.write("export ORIGIN_USER="+os.getenv("USER")+"\n")
286  fexe.write("tar -xf ../../babymaker.tar.xz\n")
287  fexe.write("cd babymaker\n")
288  fexe.write("./compile.sh\n")
289  fexe.write("cmsRun bmaker/python/bmaker_basic_cfg.py \\\n"+" \\\n".join(condor_args)+"\n")
290  fexe.write("echo \"cmsRun exit code \"$?\n")
291  #fexe.write("lcg-cp -b -D srmv2 --vo cms -t 2400 --verbose file:"+bname+".root srm://bsrm-3.t2.ucsd.edu:8443/srm/v2/server?SFN="+outpath+"\n")
292  if "T1tttt" in bname:
293  fexe.write("./bmaker/genfiles/run/skim_scan_onefile.exe "+bname+".root\n")
294  fexe.write("for i in $(ls *.root); do\n")
295  fexe.write("\tlcg-cp -b -D srmv2 --vo cms -t 2400 --verbose file:$i srm://bsrm-3.t2.ucsd.edu:8443/srm/v2/server?SFN="+outdir+"/$i\n")
296  fexe.write("done\n")
297  fexe.write("cd ../../..\n")
298  fexe.write("rm -rf "+cmssw+"\n")
299  fexe.close()
300  os.system("chmod u+x "+exefile)
301 
302  # Create condor submission cmd file
303  cmdfile = rundir+"/"+bname+".cmd"
304  print "cmdfile is "+ cmdfile
305  fcmd = open(cmdfile,"w")
306  if (host=="sb"):
307  fcmd.write("Executable = "+exefile+"\n")
308  fcmd.write("Universe = vanilla\n")
309  # send proxy even for local submissions
310  # in case fallback is necessary
311  fcmd.write("use_x509userproxy = True\n")
312  fcmd.write("x509userproxy="+proxy+"\n")
313  fcmd.write("Log = "+logdir+ "/"+bname+".log\n")
314  fcmd.write("output = "+logdir+"/"+bname+".out\n")
315  fcmd.write("error = "+logdir+"/"+bname+".err\n")
316  fcmd.write("Notification = never\n")
317  fcmd.write("Queue\n")
318  else:
319  fcmd.write("Universe = grid\n")
320  fcmd.write("Grid_Resource = condor cmssubmit-r1.t2.ucsd.edu glidein-collector.t2.ucsd.edu\n")
321  fcmd.write("use_x509userproxy = True\n")
322  fcmd.write("x509userproxy="+proxy+"\n")
323  fcmd.write("+remote_DESIRED_Sites=\""+whitelist+"\"\n")
324  fcmd.write("Executable = "+exefile+"\n")
325  fcmd.write("Transfer_Executable = True\n")
326  fcmd.write("should_transfer_files = YES\n")
327  fcmd.write("transfer_input_files = ../babymaker.tar.xz\n")
328  fcmd.write("Notification = Never\n")
329  fcmd.write("Log = "+logdir+"/"+bname+".log\n")
330  fcmd.write("output = "+logdir+"/"+bname+".out\n")
331  fcmd.write("error = "+logdir+"/"+bname+".err\n")
332  fcmd.write("queue 1\n")
333  fcmd.close()
334  total_jobs = total_jobs + 1
335 
336 # Submit condor job
337 if host=="sb":
338  cmd = "ssh cms25.physics.ucsb.edu condor_submit "
339 else:
340  cmd = "condor_submit "
341  print "INFO: Submitting", cmdfile
342 
343 # for the sake of efficiency, submit all jobs at once
344 if host=="sb":
345  os.system("scp " + proxy + " cms25.physics.ucsb.edu:/tmp")
346 os.system("cat " + rundir + "/baby*.cmd > " + rundir + "/submit_all.cmd")
347 os.system(cmd + rundir + "/submit_all.cmd")
348 print "Submitted ", total_jobs, "jobs"