-
Notifications
You must be signed in to change notification settings - Fork 0
/
edmPickEvents.py
305 lines (274 loc) · 11.3 KB
/
edmPickEvents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#!/usr/bin/env python
# Anzar Afaq June 17, 2008
# Oleksiy Atramentov June 21, 2008
# Charles Plager Sept 7, 2010
# Volker Adler Apr 16, 2014
# Raman Khurana June 18, 2015
# Dinko Ferencek June 27, 2015
import os
import sys
import optparse
import re
import commands
from FWCore.PythonUtilities.LumiList import LumiList
import das_client
import json
from pprint import pprint
from datetime import datetime
help = """
How to use:
edmPickEvent.py dataset run1:lumi1:event1 run2:lumi2:event2
- or -
edmPickEvent.py dataset listOfEvents.txt
listOfEvents is a text file:
# this line is ignored as a comment
# since '#' is a valid comment character
run1 lumi_section1 event1
run2 lumi_section2 event2
For example:
# run lum event
46968 2 4
47011 105 23
47011 140 12312
run, lumi_section, and event are integers that you can get from
edm::Event(Auxiliary)
dataset: it just a name of the physics dataset, if you don't know exact name
you can provide a mask, e.g.: *QCD*RAW
For updated information see Wiki:
https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents
"""
########################
## Event helper class ##
########################
class Event (dict):
dataset = None
splitRE = re.compile (r'[\s:,]+')
def __init__ (self, line, **kwargs):
pieces = Event.splitRE.split (line.strip())
try:
self['run'] = int( pieces[0] )
self['lumi'] = int( pieces[1] )
self['event'] = int( pieces[2] )
self['dataset'] = Event.dataset
except:
raise RuntimeError, "Can not parse '%s' as Event object" \
% line.strip()
if not self['dataset']:
print "No dataset is defined for '%s'. Aborting." % line.strip()
raise RuntimeError, 'Missing dataset'
def __getattr__ (self, key):
return self[key]
def __str__ (self):
return "run = %(run)i, lumi = %(lumi)i, event = %(event)i, dataset = %(dataset)s" % self
#################
## Subroutines ##
#################
def getFileNames (event):
files = []
# Query DAS
query = "file dataset=%(dataset)s run=%(run)i lumi=%(lumi)i | grep file.name" % event
jsondict = das_client.get_data('https://cmsweb.cern.ch', query, 0, 0, False)
status = jsondict['status']
if status != 'ok':
print "DAS query status: %s"%(status)
return files
mongo_query = jsondict['mongo_query']
filters = mongo_query['filters']
data = jsondict['data']
files = []
for row in data:
file = [r for r in das_client.get_value(row, filters['grep'])][0]
if len(file) > 0 and not file in files:
files.append(file)
return files
def fullCPMpath():
base = os.environ.get ('CMSSW_BASE')
if not base:
raise RuntimeError, "CMSSW Environment not set"
retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
% base
if os.path.exists (retval):
return retval
base = os.environ.get ('CMSSW_RELEASE_BASE')
retval = "%s/src/PhysicsTools/Utilities/configuration/copyPickMerge_cfg.py" \
% base
if os.path.exists (retval):
return retval
raise RuntimeError, "Could not find copyPickMerge_cfg.py"
def guessEmail():
return '%s@%s' % (commands.getoutput ('whoami'),
'.'.join(commands.getoutput('hostname').split('.')[-2:]))
def setupCrabDict (options):
date = datetime.now().strftime('%Y%m%d_%H%M%S')
crab = {}
base = options.base
crab['runEvent'] = '%s_runEvents.txt' % base
crab['copyPickMerge'] = fullCPMpath()
crab['output'] = '%s.root' % base
crab['crabcfg'] = '%s_crab.py' % base
crab['json'] = '%s.json' % base
crab['dataset'] = Event.dataset
crab['email'] = options.email
crab['WorkArea'] = date
if options.crabCondor:
crab['scheduler'] = 'condor'
# crab['useServer'] = ''
else:
crab['scheduler'] = 'remoteGlidein'
# crab['useServer'] = 'use_server = 1'
crab['useServer'] = ''
return crab
# crab template
crabTemplate = '''
## Edited By Raman Khurana
##
## CRAB documentation : https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrab
##
## CRAB 3 parameters : https://twiki.cern.ch/twiki/bin/view/CMSPublic/CRAB3ConfigurationFile#CRAB_configuration_parameters
##
## Once you are happy with this file, please run
## crab submit
## In CRAB3 the configuration file is in Python language. It consists of creating a Configuration object imported from the WMCore library:
from WMCore.Configuration import Configuration
config = Configuration()
## Once the Configuration object is created, it is possible to add new sections into it with corresponding parameters
config.section_("General")
config.General.requestName = 'pickEvents'
config.General.workArea = 'crab_pickevents_%(WorkArea)s'
config.section_("JobType")
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = '%(copyPickMerge)s'
config.JobType.pyCfgParams = ['eventsToProcess_load=%(runEvent)s', 'outputFile=%(output)s']
config.section_("Data")
config.Data.inputDataset = '%(dataset)s'
config.Data.inputDBS = 'global'
config.Data.splitting = 'LumiBased'
config.Data.unitsPerJob = 5
config.Data.lumiMask = '%(json)s'
#config.Data.publication = True
#config.Data.publishDbsUrl = 'phys03'
#config.Data.publishDataName = 'CRAB3_CSA_DYJets'
#config.JobType.allowNonProductionCMSSW=True
config.section_("Site")
## Change site name accordingly
config.Site.storageSite = "T2_US_Wisconsin"
'''
########################
## ################## ##
## ## Main Program ## ##
## ################## ##
########################
if __name__ == "__main__":
print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
email = guessEmail()
parser = optparse.OptionParser ("Usage: %prog [options] dataset events_or_events.txt", description='''This program
facilitates picking specific events from a data set. For full details, please visit
https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookPickEvents ''')
parser.add_option ('--output', dest='base', type='string',
default='pickevents',
help='Base name to use for output files (root, JSON, run and event list, etc.; default "%default")')
parser.add_option ('--runInteractive', dest='runInteractive', action='store_true',
help = 'Call "cmsRun" command if possible. Can take a long time.')
parser.add_option ('--printInteractive', dest='printInteractive', action='store_true',
help = 'Print "cmsRun" command instead of running it.')
parser.add_option ('--maxEventsInteractive', dest='maxEventsInteractive', type='int',
default=20,
help = 'Maximum number of events allowed to be processed interactively.')
parser.add_option ('--crab', dest='crab', action='store_true',
help = 'Force CRAB setup instead of interactive mode')
parser.add_option ('--crabCondor', dest='crabCondor', action='store_true',
help = 'Tell CRAB to use Condor scheduler (FNAL or OSG sites).')
parser.add_option ('--email', dest='email', type='string',
default='',
help="Specify email for CRAB (default '%s')" % email )
(options, args) = parser.parse_args()
if len(args) < 2:
parser.print_help()
sys.exit(0)
if not options.email:
options.email = email
Event.dataset = args.pop(0)
commentRE = re.compile (r'#.+$')
colonRE = re.compile (r':')
eventList = []
if len (args) > 1 or colonRE.search (args[0]):
# events are coming in from the command line
for piece in args:
try:
event = Event (piece)
except:
raise RuntimeError, "'%s' is not a proper event" % piece
eventList.append (event)
else:
# read events from file
source = open(args[0], 'r')
for line in source:
line = commentRE.sub ('', line)
try:
event = Event (line)
except:
print "Skipping '%s'." % line.strip()
continue
eventList.append(event)
source.close()
if not eventList:
print "No events defined. Aborting."
sys.exit()
if len (eventList) > options.maxEventsInteractive:
options.crab = True
if options.crab:
##########
## CRAB ##
##########
if options.runInteractive:
raise RuntimeError, "This job cannot be run interactively, but rather by crab. Please call without the '--runInteractive' flag or increase the '--maxEventsInteractive' value."
runsAndLumis = [ (event.run, event.lumi) for event in eventList]
json = LumiList (lumis = runsAndLumis)
eventsToProcess = '\n'.join(\
sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
crabDict = setupCrabDict (options)
json.writeJSON (crabDict['json'])
target = open (crabDict['runEvent'], 'w')
target.write ("%s\n" % eventsToProcess)
target.close()
target = open (crabDict['crabcfg'], 'w')
target.write (crabTemplate % crabDict)
target.close
print "Please visit CRAB twiki for instructions on how to setup environment for CRAB:\nhttps://twiki.cern.ch/twiki/bin/viewauth/CMS/SWGuideCrab\n"
if options.crabCondor:
print "You are running on condor. Please make sure you have read instructions on\nhttps://twiki.cern.ch/twiki/bin/view/CMS/CRABonLPCCAF\n"
if not os.path.exists ('%s/.profile' % os.environ.get('HOME')):
print "** WARNING: ** You are missing ~/.profile file. Please see CRABonLPCCAF instructions above.\n"
print "Setup your environment for CRAB and edit %(crabcfg)s to make any desired changed. Then run:\n\ncrab submit -c %(crabcfg)s\n" % crabDict
else:
#################
## Interactive ##
#################
files = []
eventPurgeList = []
for event in eventList:
eventFiles = getFileNames (event)
if eventFiles == ['[]']: # event not contained in the input dataset
print "** WARNING: ** According to a DAS query, run = %i; lumi = %i; event = %i not contained in %s. Skipping."%(event.run,event.lumi,event.event,event.dataset)
eventPurgeList.append( event )
else:
files.extend( eventFiles )
# Purge events
for event in eventPurgeList:
eventList.remove( event )
# Purge duplicate files
fileSet = set()
uniqueFiles = []
for filename in files:
if filename in fileSet:
continue
fileSet.add (filename)
uniqueFiles.append (filename)
source = ','.join (uniqueFiles) + '\n'
eventsToProcess = ','.join(\
sorted( [ "%d:%d" % (event.run, event.event) for event in eventList ] ) )
command = 'edmCopyPickMerge outputFile=%s.root \\\n eventsToProcess=%s \\\n inputFiles=%s' \
% (options.base, eventsToProcess, source)
print "\n%s" % command
if options.runInteractive and not options.printInteractive:
os.system (command)