babymaker  e95a6a9342d4604277fe7cc6149b6b5b24447d89
slim_ntuple.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 from __future__ import print_function
4 
5 import sys
6 import argparse
7 import fnmatch
8 import os
9 import utilities
10 
11 import ROOT
12 
13 def getRules(slim_file_name):
14  rules = [ line.strip().split() for line in open(slim_file_name) ]
15  good_rules = [ rule for rule in rules
16  if len(rule)==0
17  or (len(rule)>0 and rule[0].startswith("#"))
18  or (len(rule)>=2 and (rule[0]=="keep" or rule[0]=="drop")) ]
19  bad_rules = [ rule for rule in rules if rule not in good_rules ]
20  good_rules = [ rule for rule in good_rules if len(rule)>=2 ]
21  for rule in bad_rules:
22  utilities.ePrint("Invalid rule:",rule,"\n")
23  return good_rules
24 
25 def passRules(branch, rules):
26  matched_rules = [ rule for rule in rules if fnmatch.fnmatch(branch, rule[1]) ]
27  return len(matched_rules)==0 or matched_rules[-1][0] == "keep"
28 
29 def sortInputFilesBySize(input_file_names):
30  input_file_names = [ (f, os.path.getsize(f)) for f in input_file_names ]
31 
32  input_file_names.sort(key=lambda f: f[1], reverse=True)
33 
34  input_file_names = [ f[0] for f in input_file_names ]
35 
36  return input_file_names
37 
38 def slimNtuple(slim_file_name, output_file_name, input_file_names, keep_existing, test_mode):
39  print(" INPUT FILES:",input_file_names,"\n")
40  print(" OUTPUT FILE:",output_file_name,"\n")
41  print(" RULES FILE:",slim_file_name,"\n")
42 
43  if keep_existing and os.path.exists(output_file_name):
44  print("Keeping pre-existing "+output_file_name+"\n")
45  return
46 
47  in_tree = ROOT.TChain("tree", "tree")
48 
49  input_file_names = sortInputFilesBySize(input_file_names)
50 
51  for input_file_name in input_file_names:
52  in_tree.Add(input_file_name)
53 
54  branch_names = [ branch.GetName() for branch in in_tree.GetListOfBranches() ]
55  rules = getRules(slim_file_name)
56  kept_branches = [ branch for branch in branch_names if passRules(branch, rules) ]
57  kept_branches.sort()
58  dropped_branches = [ branch for branch in branch_names if branch not in kept_branches ]
59  dropped_branches.sort()
60 
61  print("DROPPED BRANCHES:",dropped_branches,"\n")
62  print(" KEPT BRANCHES:",kept_branches,"\n")
63  if test_mode: return
64 
65  for branch in branch_names:
66  if branch in kept_branches: in_tree.SetBranchStatus(branch, True)
67  else: in_tree.SetBranchStatus(branch, False)
68 
69  with utilities.ROOTFile(output_file_name, "recreate") as output_file:
70  in_tree.Merge(output_file, 0, "fast keep")
71 
72 if __name__ == "__main__":
73  parser = argparse.ArgumentParser(description="Prunes branches from an ntuple",
74  formatter_class=argparse.ArgumentDefaultsHelpFormatter)
75  parser.add_argument("-t", "--test", action="store_true",
76  help="Run in test mode, quickly diplaying the list of kept and dropped branchs without actually copying the trees.")
77  parser.add_argument("-k","--keep_existing", action="store_true",
78  help="Do not overwrite output file if it already exists.")
79  parser.add_argument("slim_file",
80  help="File containing rules for pruning branches (one rule per line). Rules are are the form \"keep XXX\" or \"drop YYY\". Unix shell-style wildcards (e.g., '*') allow pattern matching. Branches are kept by default if no matching rule is found for the branch. If multiple rules match, the last takes precedence.")
81  parser.add_argument("output_file",
82  help="File in which to save the slimmed and merged ntuple.")
83  parser.add_argument("input_files", nargs="+",
84  help="Files containing ntuples to be slimmed and merged.")
85  args = parser.parse_args()
86 
87  slimNtuple(args.slim_file, args.output_file, args.input_files, args.keep_existing, args.test)
def passRules(branch, rules)
Definition: slim_ntuple.py:25
def getRules(slim_file_name)
Definition: slim_ntuple.py:13
def sortInputFilesBySize(input_file_names)
Definition: slim_ntuple.py:29
def ePrint(args, kwargs)
Definition: utilities.py:44
def slimNtuple(slim_file_name, output_file_name, input_file_names, keep_existing, test_mode)
Definition: slim_ntuple.py:38