babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
stageout.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import os, glob
3 
4 # ------------------------------------------------------------------------------------------
5 # This script can be used to "manually" download babies that were produced ok, but failed
6 # to actually be transferred to our Tier 3.
7 # Assumption 1: all the failed transfers are due to one bad site. If we see that there
8 # are often >1 bad sites, this can be modified.
9 # Assumption 2: at least one file made it to hadoop.
10 # It is ok to use even if the assumptions are wrong, it will just not find the files
11 # and you will get a message "FAIL" in red.
12 #
13 # It seems to take ~ 30-50s per file download
14 #
15 # 1. from the web task monitoring interface open one log file of the type "Job"
16 # and find the origin PFN, listed on the line starting with "Stage Out Successful"
17 # enter the result here as origin, stripping anything including and after the dataset name
18 #
19 # 2. in the same file, find the line starting with: "JOB AD: CRAB_Destination"
20 # and check the destination header, likely as the example, but with your CERN username
21 #
22 # 3. again looking at the web monitoring interface,
23 # get the failed task names (without the "$USER_") and the number of jobs
24 # (regardless of status) enter this in the dictionary named 'tasks' below, enter
25 # ------------------------------------------------------------------------------------------
26 
27 # enter origin and destination headers in place of these examples
28 origin = 'srm://heplnx204.pp.rl.ac.uk:8443/srm/managerv2?SFN=/pnfs/pp.rl.ac.uk/data/cms/store/temp/user/ana.91d36fddf73016fe56a4674b87cd86f61feae489/'
29 dest = 'srm://cms25.physics.ucsb.edu:8443/srm/v2/server?SFN=/mnt/hadoop/cms/store/user/ana/'
30 
31 # failed tasks and the total number of jobs
32 tasks = {
33 'crab_SMS-T1tttt_mGluino-600_mLSP-250to325_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15FSPremix-MCRUN2_74_V9-v1__MINIAODSIM': 11,
34 'crab_SMS-T1tttt_mGluino-1150to1175_mLSP-750to925_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15FSPremix-MCRUN2_74_V9-v1__MINIAODSIM': 10,
35 'crab_SMS-T1tttt_mGluino-1400_mLSP-1to1175_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15FSPremix-MCRUN2_74_V9-v1__MINIAODSIM': 11,
36 'crab_SMS-T1tttt_mGluino-1425to1450_mLSP-1to1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15FSPremix-MCRUN2_74_V9-v1__MINIAODSIM': 11,
37 'crab_SMS-T1tttt_mGluino-1800to1850_mLSP-1to1450_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15FSPremix-MCRUN2_74_V9-v1__MINIAODSIM': 13,
38 'crab_TTJets_HT-800to1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15DR74-Asympt25ns_MCRUN2_74_V9-v1__MINIAODSIM': 25,
39 'crab_TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISpring15DR74-Asympt25ns_MCRUN2_74_V9-v1__MINIAODSIM': 70
40 }
41 
42 # In general there should be no need to modify anything below...
43 
44 for task in tasks.keys():
45  print '\n' + 20*'=--' + '\n'
46  print "Working on task:", task
47  print "Number of jobs:", tasks[task]
48  # find what is already on hadoop
49  hadoop_base = dest.split('SFN=').pop()
50  good_files = glob.glob(hadoop_base + task.replace('crab_','').split('__')[0]+"/" + task + "/*/*/*root")
51  # see if there were multiple subdirectories in the time-stamped directory
52  runs = set([i.split('/')[-2] for i in good_files]) # put in a set to remove repeating entries
53  if (len(runs)>1): print "Found multiple runs ", len(runs)
54 
55  # print info on files on hadoop
56  if (len(good_files)>0):
57  print "Found %i files:" % len(good_files)
58  # for i in good_files: print i
59  else:
60  print "No files found on hadoop"
61  print "\033[91m -- FAIL\033[0m", task
62  continue
63 
64  # download files
65  ndls = 0
66  for run in runs:
67  # find a template for a file name corresponding to this run
68  for igf in good_files:
69  if (run==(igf.split('/')[-2])): template = igf
70  break
71  # loop over job ids
72  for job in range(1,tasks[task]+1):
73  abspath = template[0:template.rfind('_')] + '_'+str(job)+'.root'
74  relpath = abspath.replace(hadoop_base,'')
75  # do we already have the file?
76  if abspath in good_files: continue
77 
78  cmd = 'lcg-cp --verbose --vo=cms -b -D srmv2 '+ origin + relpath + ' ' + dest + relpath
79  exitcode = os.system(cmd)
80  if (exitcode==0): ndls = ndls + 1
81  else: print "ERROR:: Download failed."
82 
83  if (tasks[task] == (len(good_files) + ndls)):
84  print "\033[92m -- SUCCESS:\033[0m", task
85  else:
86  print "\033[91m -- FAIL\033[0m", task
87